From bc34866c54df0acdc4977cf97a1ea8f172f4ffad Mon Sep 17 00:00:00 2001 From: Wu Liliu Date: Thu, 23 Jun 2022 14:31:07 +0800 Subject: [PATCH 01/77] sw64: check processor state by user_mode(regs) Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- The `regs->ps & 8` and `regs->ps & ~IPL_MAX` don't have a clear meaning. We replace them with user_mode(regs) and !user_mode(regs), and reserve regs->ps[63:4] for future extension. Signed-off-by: Wu Liliu Signed-off-by: Gu Zitao --- arch/sw_64/kernel/traps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index 4e95cab13daa..8c7fdeeef491 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -77,7 +77,7 @@ dik_show_code(unsigned int *pc) void die_if_kernel(char *str, struct pt_regs *regs, long err) { - if (regs->ps & 8) + if (user_mode(regs)) return; #ifdef CONFIG_SMP printk("CPU %d ", hard_smp_processor_id()); @@ -149,7 +149,7 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) type = inst_type & 0xffffffff; inst = inst_type >> 32; - if ((regs->ps & ~IPL_MAX) == 0 && type != 4) { + if (!user_mode(regs) && type != 4) { if (type == 1) { const unsigned int *data = (const unsigned int *) regs->pc; @@ -253,7 +253,7 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) if (notify_die(DIE_UPROBE_XOL, "uprobe_xol", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) return; } - if ((regs->ps & ~IPL_MAX) == 0) + if (!user_mode(regs)) die_if_kernel("Instruction fault", regs, type); break; -- Gitee From 7a4c401b0925baeff8dcec8feac87757424ec1fe Mon Sep 17 00:00:00 2001 From: Wu Liliu Date: Tue, 12 Jul 2022 14:49:24 +0800 Subject: [PATCH 02/77] sw64: reimplement die_if_kernel() Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- In the original implementation, incorrent printing may occur when multiple processes die at the same time. To fix this, we use lock. Signed-off-by: Wu Liliu Signed-off-by: Gu Zitao --- arch/sw_64/kernel/proto.h | 2 +- arch/sw_64/kernel/traps.c | 50 ++++++++++++++++++++++++++------------- arch/sw_64/mm/fault.c | 4 ++-- 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/arch/sw_64/kernel/proto.h b/arch/sw_64/kernel/proto.h index 189074f8bd5c..f2b77d370da1 100644 --- a/arch/sw_64/kernel/proto.h +++ b/arch/sw_64/kernel/proto.h @@ -13,7 +13,7 @@ extern int ptrace_cancel_bpt(struct task_struct *child); /* traps.c */ extern void dik_show_regs(struct pt_regs *regs); -extern void die_if_kernel(char *str, struct pt_regs *regs, long err); +extern void die(char *str, struct pt_regs *regs, long err); /* timer.c */ extern void setup_timer(void); diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index 8c7fdeeef491..5f2348dd087f 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include @@ -75,33 +77,47 @@ dik_show_code(unsigned int *pc) printk("\n"); } -void die_if_kernel(char *str, struct pt_regs *regs, long err) +static DEFINE_SPINLOCK(die_lock); + +void die(char *str, struct pt_regs *regs, long err) { - if (user_mode(regs)) - return; + static int die_counter; + unsigned long flags; + int ret; + + oops_enter(); + + spin_lock_irqsave(&die_lock, flags); + console_verbose(); + bust_spinlocks(1); + + pr_emerg("%s [#%d]\n", str, ++die_counter); + #ifdef CONFIG_SMP printk("CPU %d ", hard_smp_processor_id()); #endif printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err); + + ret = notify_die(DIE_OOPS, str, regs, err, 0, SIGSEGV); + + print_modules(); dik_show_regs(regs); - add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); show_stack(current, NULL, KERN_EMERG); - dik_show_code((unsigned int *)regs->pc); - if (test_and_set_thread_flag(TIF_DIE_IF_KERNEL)) { - printk("die_if_kernel recursion detected.\n"); - local_irq_enable(); - while (1) - asm("nop"); - } + bust_spinlocks(0); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + spin_unlock_irqrestore(&die_lock, flags); + oops_exit(); if (kexec_should_crash(current)) crash_kexec(regs); - + if (in_interrupt()) + panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); - do_exit(SIGSEGV); + if (ret != NOTIFY_STOP) + do_exit(SIGSEGV); } #ifndef CONFIG_MATHEMU @@ -135,7 +151,9 @@ do_entArith(unsigned long summary, unsigned long write_mask, if (si_code == 0) return; } - die_if_kernel("Arithmetic fault", regs, 0); + + if (!user_mode(regs)) + die("Arithmetic fault", regs, 0); force_sig_fault(SIGFPE, si_code, (void __user *)regs->pc, 0); } @@ -161,7 +179,7 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) notify_die(0, "kgdb trap", regs, 0, 0, SIGTRAP); return; } - die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"), + die((type == 1 ? "Kernel Bug" : "Instruction fault"), regs, type); } @@ -254,7 +272,7 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) return; } if (!user_mode(regs)) - die_if_kernel("Instruction fault", regs, type); + die("Instruction fault", regs, type); break; case 3: /* FEN fault */ diff --git a/arch/sw_64/mm/fault.c b/arch/sw_64/mm/fault.c index d596fc50772d..15fd65b1b754 100644 --- a/arch/sw_64/mm/fault.c +++ b/arch/sw_64/mm/fault.c @@ -31,7 +31,7 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned long mmcsr) } #endif -extern void die_if_kernel(char *, struct pt_regs *, long); +extern void die(char *, struct pt_regs *, long); extern void dik_show_regs(struct pt_regs *regs); void show_all_vma(void) @@ -301,7 +301,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, */ pr_alert("Unable to handle kernel paging request at virtual address %016lx\n", address); - die_if_kernel("Oops", regs, cause); + die("Oops", regs, cause); do_exit(SIGKILL); /* -- Gitee From a6f35c6bbe300fe3df3bb26a1d024358778405c9 Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Tue, 12 Jul 2022 16:05:32 +0800 Subject: [PATCH 03/77] sw64: adjust make rules to avoid compile error Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Fix compile errors when CONFIG_NUMA or CONFIG_SPARSEMEM is not set. Some make rules are changed to avoid potential compile errors. Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 1 + arch/sw_64/kernel/Makefile | 6 +++++- arch/sw_64/kvm/Kconfig | 2 +- arch/sw_64/kvm/kvm-sw64.c | 8 ++++++-- arch/sw_64/mm/init.c | 2 ++ arch/sw_64/mm/physaddr.c | 1 + 6 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index cf2f6f00708c..f03be9ce50cc 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -97,6 +97,7 @@ config SW64 select HAVE_REGS_AND_STACK_ACCESS_API select ARCH_HAS_PTE_SPECIAL select HARDIRQS_SW_RESEND + select MEMORY_HOTPLUG_SPARSE if MEMORY_HOTPLUG config LOCKDEP_SUPPORT def_bool y diff --git a/arch/sw_64/kernel/Makefile b/arch/sw_64/kernel/Makefile index d4dc9e175d67..8cc09dd8fdbc 100644 --- a/arch/sw_64/kernel/Makefile +++ b/arch/sw_64/kernel/Makefile @@ -31,9 +31,13 @@ obj-$(CONFIG_HIBERNATION) += hibernate_asm.o hibernate.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_PCI) += pci_common.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_DEBUG_FS) += segvdbg.o bindvcpu.o +obj-$(CONFIG_DEBUG_FS) += segvdbg.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o +ifeq ($(CONFIG_DEBUG_FS)$(CONFIG_NUMA),yy) +obj-y += bindvcpu.o +endif + ifndef CONFIG_PCI obj-y += pci-noop.o endif diff --git a/arch/sw_64/kvm/Kconfig b/arch/sw_64/kvm/Kconfig index 85323b48f564..4b6201ff5dc8 100644 --- a/arch/sw_64/kvm/Kconfig +++ b/arch/sw_64/kvm/Kconfig @@ -44,7 +44,7 @@ config KVM_SW64_HOST config KVM_MEMHOTPLUG bool "Memory hotplug support for guest" - depends on KVM + depends on KVM && MEMORY_HOTPLUG help Provides memory hotplug support for SW64 guest. diff --git a/arch/sw_64/kvm/kvm-sw64.c b/arch/sw_64/kvm/kvm-sw64.c index de81f7efe01a..8ba7e18698b3 100644 --- a/arch/sw_64/kvm/kvm-sw64.c +++ b/arch/sw_64/kvm/kvm-sw64.c @@ -21,7 +21,9 @@ bool set_msi_flag; unsigned long sw64_kvm_last_vpn[NR_CPUS]; +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_NUMA) __read_mostly bool bind_vcpu_enabled; +#endif #define cpu_last_vpn(cpuid) sw64_kvm_last_vpn[cpuid] #ifdef CONFIG_SUBARCH_C3B @@ -539,6 +541,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu->arch.vcb.vpcr = get_vpcr(vcpu->kvm->arch.host_phys_addr, vcpu->kvm->arch.size, 0); +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_NUMA) if (unlikely(bind_vcpu_enabled)) { int nid; unsigned long end; @@ -548,11 +551,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (pfn_to_nid(PHYS_PFN(end)) == nid) set_cpus_allowed_ptr(vcpu->arch.tsk, node_to_cpumask_map[nid]); } -#else +#endif +#else /* !CONFIG_KVM_MEMHOTPLUG */ unsigned long seg_base = virt_to_phys(vcpu->kvm->arch.seg_pgd); vcpu->arch.vcb.vpcr = get_vpcr_memhp(seg_base, 0); -#endif +#endif /* CONFIG_KVM_MEMHOTPLUG */ vcpu->arch.vcb.upcr = 0x7; } diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index 82f2414ef7f7..e0096a0b432a 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -34,6 +34,7 @@ static pud_t vmalloc_pud[1024] __attribute__((__aligned__(PAGE_SIZE))); static phys_addr_t mem_start; static phys_addr_t mem_size_limit; +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE unsigned long memory_block_size_bytes(void) { if (is_in_guest()) @@ -41,6 +42,7 @@ unsigned long memory_block_size_bytes(void) else return MIN_MEMORY_BLOCK_SIZE; } +#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ static int __init setup_mem_size(char *p) { diff --git a/arch/sw_64/mm/physaddr.c b/arch/sw_64/mm/physaddr.c index 26769f0bf7bf..17840f4ef40b 100644 --- a/arch/sw_64/mm/physaddr.c +++ b/arch/sw_64/mm/physaddr.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include unsigned long __phys_addr(unsigned long x) -- Gitee From 2cbcf323e8e28d4a9af15b7bd1adac35dba788b6 Mon Sep 17 00:00:00 2001 From: Lu Feifei Date: Fri, 15 Jul 2022 13:47:08 +0800 Subject: [PATCH 04/77] sw64: delete run_mode in struct cpu_desc_t Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Signed-off-by: Lu Feifei Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/hw_init.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sw_64/include/asm/hw_init.h b/arch/sw_64/include/asm/hw_init.h index f60a58570a92..71b569b6ec98 100644 --- a/arch/sw_64/include/asm/hw_init.h +++ b/arch/sw_64/include/asm/hw_init.h @@ -45,7 +45,6 @@ struct cpu_desc_t { char vendor_id[16]; char model_id[64]; unsigned long frequency; - __u8 run_mode; } __randomize_layout; #define MAX_NUMSOCKETS 8 -- Gitee From 2a97fdb2d344d273e07dca5ddbcdb48e3c032972 Mon Sep 17 00:00:00 2001 From: Gu Zitao Date: Fri, 15 Jul 2022 11:06:06 +0800 Subject: [PATCH 05/77] sw64: fix compile error and warning for CONFIG_SMP=n Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Signed-off-by: Gu Zitao --- arch/sw_64/kernel/process.c | 2 +- arch/sw_64/kernel/time.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/sw_64/kernel/process.c b/arch/sw_64/kernel/process.c index a75ae20205f3..2508c55311ca 100644 --- a/arch/sw_64/kernel/process.c +++ b/arch/sw_64/kernel/process.c @@ -52,7 +52,7 @@ void arch_cpu_idle(void) static void common_shutdown_1(void *generic_ptr) { struct halt_info *how = (struct halt_info *)generic_ptr; - int cpuid = smp_processor_id(); + int cpuid __maybe_unused = smp_processor_id(); /* No point in taking interrupts anymore. */ local_irq_disable(); diff --git a/arch/sw_64/kernel/time.c b/arch/sw_64/kernel/time.c index 15035a01e48a..0be676c80be4 100644 --- a/arch/sw_64/kernel/time.c +++ b/arch/sw_64/kernel/time.c @@ -4,6 +4,9 @@ #include #include #include +#ifndef CONFIG_SMP +#include +#endif #include -- Gitee From c1a605c8c7af872b01603cfbe80cff4f51894ea3 Mon Sep 17 00:00:00 2001 From: Gu Zitao Date: Thu, 14 Jul 2022 10:00:10 +0800 Subject: [PATCH 06/77] sw64: add MIGHT_HAVE_PC_SERIO option to control selection of i8042 Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Since using i8042 drivers on sw64 would cause kernel crash, we add MIGHT_HAVE_PC_SERIO option to control selection of i8042. Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index f03be9ce50cc..f9a270396f5a 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -14,7 +14,6 @@ config SW64 select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION select ARCH_HAVE_NMI_SAFE_CMPXCHG - select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_NO_PREEMPT select ARCH_USE_CMPXCHG_LOCKREF select GENERIC_SMP_IDLE_THREAD @@ -241,6 +240,11 @@ config PLATFORM_XUELANG endchoice +config MIGHT_HAVE_PC_SERIO + bool "Use PC serio device i8042" + select ARCH_MIGHT_HAVE_PC_SERIO + default n + endmenu config LOCK_MEMB -- Gitee From 93200e986279dc27a10ff55f4fc9627a5453cca1 Mon Sep 17 00:00:00 2001 From: Zhou Xuemei Date: Mon, 1 Aug 2022 13:40:25 +0800 Subject: [PATCH 07/77] sw64: pci: consolidate PCI config entry in drivers/pci Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- According to commit eb01d42a7778 ("PCI: consolidate PCI config entry in drivers/pci"), use PCI config entry in drivers/pci instead of arch/sw64. Signed-off-by: Zhou Xuemei Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index f9a270396f5a..deefaf312628 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -91,6 +91,8 @@ config SW64 select ACPI_REDUCED_HARDWARE_ONLY select GENERIC_TIME_VSYSCALL select SET_FS + select HAVE_PCI + select GENERIC_PCI_IOMAP if PCI select PCI_MSI_ARCH_FALLBACKS select DMA_OPS if PCI select HAVE_REGS_AND_STACK_ACCESS_API @@ -514,17 +516,6 @@ config ISA_DMA_API bool default y -config PCI - bool "PCI Support" - depends on SW64 - select GENERIC_PCI_IOMAP - default y - help - Find out whether you have a PCI motherboard. PCI is the name of a - bus system, i.e. the way the CPU talks to the other stuff inside - your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or - VESA. If you have PCI, say Y, otherwise N. - config PCI_DOMAINS def_bool PCI @@ -729,7 +720,6 @@ config HZ int "HZ of the short timer" default 500 -source "drivers/pci/Kconfig" source "drivers/eisa/Kconfig" source "drivers/pcmcia/Kconfig" -- Gitee From 1054194d0134d8f0b8a4f5210f964b076412cca8 Mon Sep 17 00:00:00 2001 From: Zhou Xuemei Date: Mon, 27 Jun 2022 11:15:02 +0800 Subject: [PATCH 08/77] sw64: gpu: use memset_io and memcpy_toio/fromio for iomem Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5GDKC -------------------------------- This commit complements commit afe00ca0c338 ("sw64: gpu: correct low-level mmio memset/memcpy direct calls"). Signed-off-by: Zhou Xuemei Signed-off-by: Gu Zitao --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 8 +++++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 +++++++++++++++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 42 ++++++++++++++++++++++----- 3 files changed, 67 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 04eaf3a8fddb..946f25f1079f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2816,7 +2816,11 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) } /* clear memory. Not sure if this is required or not */ +#if IS_ENABLED(CONFIG_SW64) + memset_io(hpd, 0, mec_hpd_size); +#else memset(hpd, 0, mec_hpd_size); +#endif amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); @@ -2926,7 +2930,11 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, u64 wb_gpu_addr; /* init the mqd struct */ +#if IS_ENABLED(CONFIG_SW64) + memset_io(mqd, 0, sizeof(struct cik_mqd)); +#else memset(mqd, 0, sizeof(struct cik_mqd)); +#endif mqd->header = 0xC0310800; mqd->compute_static_thread_mgmt_se0 = 0xffffffff; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 28c4e1fe5cd4..0ac2c33a0667 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4641,8 +4641,13 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ - if (adev->gfx.mec.mqd_backup[mqd_idx]) + if (adev->gfx.mec.mqd_backup[mqd_idx]) { +#if IS_ENABLED(CONFIG_SW64) + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); +#else memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); +#endif + } /* reset ring buffer */ ring->wptr = 0; @@ -4667,12 +4672,13 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.mec.mqd_backup[mqd_idx]) + if (adev->gfx.mec.mqd_backup[mqd_idx]) { #if IS_ENABLED(CONFIG_SW64) memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); #else memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); #endif + } } return 0; @@ -4685,7 +4691,11 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) int mqd_idx = ring - &adev->gfx.compute_ring[0]; if (!amdgpu_in_reset(adev) && !adev->in_suspend) { +#if IS_ENABLED(CONFIG_SW64) + memset_io((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); +#else memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); +#endif ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); @@ -4694,12 +4704,23 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.mec.mqd_backup[mqd_idx]) + if (adev->gfx.mec.mqd_backup[mqd_idx]) { +#if IS_ENABLED(CONFIG_SW64) + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); +#else memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); +#endif + } } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ - if (adev->gfx.mec.mqd_backup[mqd_idx]) + if (adev->gfx.mec.mqd_backup[mqd_idx]) { +#if IS_ENABLED(CONFIG_SW64) + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); +#else memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); +#endif + } + /* reset ring buffer */ ring->wptr = 0; amdgpu_ring_clear_ring(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c621ebd90031..c8d1245bfc2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1978,7 +1978,11 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) return r; } +#if IS_ENABLED(CONFIG_SW64) + memset_io(hpd, 0, mec_hpd_size); +#else memset(hpd, 0, mec_hpd_size); +#endif amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); @@ -3724,10 +3728,11 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) { - if (IS_ENABLED(CONFIG_SW64)) - memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); - else - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); +#if IS_ENABLED(CONFIG_SW64) + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); +#else + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); +#endif } /* reset ring buffer */ @@ -3740,7 +3745,11 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } else { +#if IS_ENABLED(CONFIG_SW64) + memset_io((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); +#else memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); +#endif ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); @@ -3751,10 +3760,11 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) { - if (IS_ENABLED(CONFIG_SW64)) - memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); - else - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); +#if IS_ENABLED(CONFIG_SW64) + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); +#else + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); +#endif } } @@ -3768,7 +3778,11 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) int mqd_idx = ring - &adev->gfx.compute_ring[0]; if (!amdgpu_in_reset(adev) && !adev->in_suspend) { +#if IS_ENABLED(CONFIG_SW64) + memset_io((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); +#else memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); +#endif ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); @@ -3778,11 +3792,23 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) + if (adev->gfx.mec.mqd_backup[mqd_idx]) { +#if IS_ENABLED(CONFIG_SW64) + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); +#else memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); +#endif + } } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) + if (adev->gfx.mec.mqd_backup[mqd_idx]) { +#if IS_ENABLED(CONFIG_SW64) + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); +#else memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); +#endif + } /* reset ring buffer */ ring->wptr = 0; -- Gitee From 708634b0992b0880f34a07ad37632dd5834f4c8b Mon Sep 17 00:00:00 2001 From: He Sheng Date: Tue, 19 Jul 2022 09:00:02 +0800 Subject: [PATCH 09/77] sw64: always use cpu_data and simplify it Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Make use of cpu_data no matter SMP is yes or no, and remove unused fields from it. After that, some function calls can be simplified. Signed-off-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/hw_init.h | 7 ++----- arch/sw_64/include/asm/mmu_context.h | 5 ----- arch/sw_64/kernel/setup.c | 21 ++++++++++++++++----- arch/sw_64/kernel/smp.c | 27 ++------------------------- arch/sw_64/kernel/time.c | 6 +----- arch/sw_64/mm/fault.c | 9 +-------- 6 files changed, 22 insertions(+), 53 deletions(-) diff --git a/arch/sw_64/include/asm/hw_init.h b/arch/sw_64/include/asm/hw_init.h index 71b569b6ec98..de9f93f9b26e 100644 --- a/arch/sw_64/include/asm/hw_init.h +++ b/arch/sw_64/include/asm/hw_init.h @@ -23,11 +23,6 @@ struct cpuinfo_sw64 { int need_new_asn; int asn_lock; unsigned long ipi_count; - unsigned long prof_multiplier; - unsigned long prof_counter; - unsigned char mcheck_expected; - unsigned char mcheck_taken; - unsigned char mcheck_extra; struct cache_desc icache; /* Primary I-cache */ struct cache_desc dcache; /* Primary D or combined I/D cache */ struct cache_desc scache; /* Secondary cache */ @@ -73,6 +68,8 @@ struct memmap_entry { }; extern struct cpuinfo_sw64 cpu_data[NR_CPUS]; +extern void store_cpu_data(int cpu); + extern struct cpu_desc_t cpu_desc; extern struct socket_desc_t socket_desc[MAX_NUMSOCKETS]; extern int memmap_nr; diff --git a/arch/sw_64/include/asm/mmu_context.h b/arch/sw_64/include/asm/mmu_context.h index d6cd01d55712..a797673273af 100644 --- a/arch/sw_64/include/asm/mmu_context.h +++ b/arch/sw_64/include/asm/mmu_context.h @@ -60,12 +60,7 @@ __reload_thread(struct pcb_struct *pcb) */ #include -#ifdef CONFIG_SMP #define cpu_last_asn(cpuid) (cpu_data[cpuid].last_asn) -#else -extern unsigned long last_asn; -#define cpu_last_asn(cpuid) last_asn -#endif /* CONFIG_SMP */ #define ASN_FIRST_VERSION (1UL << WIDTH_HARDWARE_ASN) #define HARDWARE_ASN_MASK ((1UL << WIDTH_HARDWARE_ASN) - 1) diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index 0e93643539d3..39103e4edee4 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -28,9 +28,10 @@ #include #include -#include #include #include +#include +#include #include "proto.h" #include "pci_impl.h" @@ -137,6 +138,17 @@ struct screen_info screen_info = { }; EXPORT_SYMBOL(screen_info); +/* + * Move global data into per-processor storage. + */ +void store_cpu_data(int cpu) +{ + cpu_data[cpu].loops_per_jiffy = loops_per_jiffy; + cpu_data[cpu].last_asn = ASN_FIRST_VERSION; + cpu_data[cpu].need_new_asn = 0; + cpu_data[cpu].asn_lock = 0; +} + #ifdef CONFIG_KEXEC void *kexec_control_page; @@ -859,13 +871,12 @@ setup_arch(char **cmdline_p) /* Default root filesystem to sda2. */ ROOT_DEV = Root_SDA2; - /* - * Identify the flock of penguins. - */ - #ifdef CONFIG_SMP setup_smp(); +#else + store_cpu_data(0); #endif + #ifdef CONFIG_NUMA cpu_set_node(); #endif diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index fb915d166069..1004e9e3be27 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -59,29 +59,6 @@ EXPORT_SYMBOL(smp_num_cpus); #define send_sleep_interrupt(cpu) send_ipi((cpu), II_SLEEP) #define send_wakeup_interrupt(cpu) send_ipi((cpu), II_WAKE) -/* - * Called by both boot and secondaries to move global data into - * per-processor storage. - */ -static inline void __init -smp_store_cpu_info(int cpuid) -{ - cpu_data[cpuid].loops_per_jiffy = loops_per_jiffy; - cpu_data[cpuid].last_asn = ASN_FIRST_VERSION; - cpu_data[cpuid].need_new_asn = 0; - cpu_data[cpuid].asn_lock = 0; -} - -/* - * Ideally sets up per-cpu profiling hooks. Doesn't do much now... - */ -static inline void __init -smp_setup_percpu_timer(int cpuid) -{ - setup_timer(); - cpu_data[cpuid].prof_counter = 1; - cpu_data[cpuid].prof_multiplier = 1; -} static void __init wait_boot_cpu_to_stop(int cpuid) { @@ -128,7 +105,7 @@ void smp_callin(void) wrent(entInt, 0); /* Get our local ticker going. */ - smp_setup_percpu_timer(cpuid); + setup_timer(); /* All kernel threads share the same mm context. */ mmgrab(&init_mm); @@ -298,7 +275,7 @@ void __init setup_smp(void) __cpu_to_rcid[num] = i; __rcid_to_cpu[i] = num; set_cpu_possible(num, true); - smp_store_cpu_info(num); + store_cpu_data(num); if (!cpumask_test_cpu(i, &cpu_offline)) set_cpu_present(num, true); num++; diff --git a/arch/sw_64/kernel/time.c b/arch/sw_64/kernel/time.c index 0be676c80be4..6a4c8a31465c 100644 --- a/arch/sw_64/kernel/time.c +++ b/arch/sw_64/kernel/time.c @@ -96,10 +96,6 @@ void setup_clocksource(void) } #endif /* !CONFIG_SMP */ -void __init common_init_rtc(void) -{ - setup_timer(); -} void __init time_init(void) @@ -114,7 +110,7 @@ time_init(void) setup_clocksource(); of_clk_init(NULL); /* Startup the timer source. */ - common_init_rtc(); + setup_timer(); } void calibrate_delay(void) diff --git a/arch/sw_64/mm/fault.c b/arch/sw_64/mm/fault.c index 15fd65b1b754..3255de5f0019 100644 --- a/arch/sw_64/mm/fault.c +++ b/arch/sw_64/mm/fault.c @@ -64,13 +64,7 @@ void show_all_vma(void) /* * Force a new ASN for a task. */ - -#ifndef CONFIG_SMP -unsigned long last_asn = ASN_FIRST_VERSION; -#endif - -void -__load_new_mm_context(struct mm_struct *next_mm) +void __load_new_mm_context(struct mm_struct *next_mm) { unsigned long mmc; struct pcb_struct *pcb; @@ -85,7 +79,6 @@ __load_new_mm_context(struct mm_struct *next_mm) __reload_thread(pcb); } - /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to handle_mm_fault(). -- Gitee From b786f064161203d0cf2ced1a9f86701a2a765b31 Mon Sep 17 00:00:00 2001 From: He Chuyue Date: Tue, 19 Jul 2022 15:33:26 +0800 Subject: [PATCH 10/77] sw64: simplify do_entInt() Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- It has already disabled interrupt in hmcode and it's unnecessary to disable again in do_entInt(). This way, do_entInt() is a wrapper of handle_chip_irq(), so simplify it. Signed-off-by: He Chuyue Signed-off-by: Gu Zitao --- arch/sw_64/chip/chip3/chip.c | 5 +++-- arch/sw_64/include/asm/irq_impl.h | 4 +--- arch/sw_64/kernel/irq_sw64.c | 9 --------- 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/arch/sw_64/chip/chip3/chip.c b/arch/sw_64/chip/chip3/chip.c index 84ca7ffcb2ef..8697891e3930 100644 --- a/arch/sw_64/chip/chip3/chip.c +++ b/arch/sw_64/chip/chip3/chip.c @@ -656,8 +656,8 @@ static void handle_dev_int(struct pt_regs *regs) sw64_io_write(node, DEV_INT_CONFIG, config_val); } -void handle_chip_irq(unsigned long type, unsigned long vector, - unsigned long irq_arg, struct pt_regs *regs) +asmlinkage void do_entInt(unsigned long type, unsigned long vector, + unsigned long irq_arg, struct pt_regs *regs) { struct pt_regs *old_regs; @@ -738,6 +738,7 @@ void handle_chip_irq(unsigned long type, unsigned long vector, } pr_crit("PC = %016lx PS = %04lx\n", regs->pc, regs->ps); } +EXPORT_SYMBOL(do_entInt); /* * Early fix up the chip3 Root Complex settings diff --git a/arch/sw_64/include/asm/irq_impl.h b/arch/sw_64/include/asm/irq_impl.h index b568efef6994..48dbc486a126 100644 --- a/arch/sw_64/include/asm/irq_impl.h +++ b/arch/sw_64/include/asm/irq_impl.h @@ -41,10 +41,8 @@ enum sw64_irq_type { extern struct irqaction timer_irqaction; extern void init_rtc_irq(irq_handler_t handler); extern void handle_irq(int irq); -extern void handle_ipi(struct pt_regs *); +extern void handle_ipi(struct pt_regs *regs); extern void __init sw64_init_irq(void); extern irqreturn_t timer_interrupt(int irq, void *dev); -extern void handle_chip_irq(unsigned long type, unsigned long vector, - unsigned long irq_arg, struct pt_regs *regs); #endif diff --git a/arch/sw_64/kernel/irq_sw64.c b/arch/sw_64/kernel/irq_sw64.c index 8ab845d153eb..88809fa531dd 100644 --- a/arch/sw_64/kernel/irq_sw64.c +++ b/arch/sw_64/kernel/irq_sw64.c @@ -9,15 +9,6 @@ #include #include -asmlinkage void -do_entInt(unsigned long type, unsigned long vector, - unsigned long irq_arg, struct pt_regs *regs) -{ - local_irq_disable(); - handle_chip_irq(type, vector, irq_arg, regs); -} -EXPORT_SYMBOL(do_entInt); - void __init init_IRQ(void) { -- Gitee From 8cc25723179f99a2d1f089dce8110a85799f8f8a Mon Sep 17 00:00:00 2001 From: Wang Yuanheng Date: Wed, 20 Jul 2022 15:55:57 +0800 Subject: [PATCH 11/77] sw64: fix compile errors when CONFIG_KVM=m Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Export symbol for bind_vcpu_enabled to fix compile errors when CONFIG_KVM=m. Signed-off-by: Wang Yuanheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/bindvcpu.c | 3 ++- arch/sw_64/kvm/kvm-sw64.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/sw_64/kernel/bindvcpu.c b/arch/sw_64/kernel/bindvcpu.c index 611c395c144b..46617eb68b7a 100644 --- a/arch/sw_64/kernel/bindvcpu.c +++ b/arch/sw_64/kernel/bindvcpu.c @@ -11,7 +11,8 @@ #include #include -extern bool bind_vcpu_enabled; +__read_mostly bool bind_vcpu_enabled; +EXPORT_SYMBOL(bind_vcpu_enabled); static int __init bind_vcpu_init(void) { diff --git a/arch/sw_64/kvm/kvm-sw64.c b/arch/sw_64/kvm/kvm-sw64.c index 8ba7e18698b3..9d209141820c 100644 --- a/arch/sw_64/kvm/kvm-sw64.c +++ b/arch/sw_64/kvm/kvm-sw64.c @@ -22,7 +22,7 @@ bool set_msi_flag; unsigned long sw64_kvm_last_vpn[NR_CPUS]; #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_NUMA) -__read_mostly bool bind_vcpu_enabled; +extern bool bind_vcpu_enabled; #endif #define cpu_last_vpn(cpuid) sw64_kvm_last_vpn[cpuid] -- Gitee From 6bcb4831a4dc15a7de2b98c4141ff357111e0d9d Mon Sep 17 00:00:00 2001 From: Du Yilong Date: Mon, 25 Jul 2022 14:24:36 +0800 Subject: [PATCH 12/77] sw64: kvm: expand the number of SWVM_IRQS Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PN9S -------------------------------- As more devices are hot-plugged in a guest os, it will fail to respond interrupts because of insufficient interrupt resource and appear to be stuck. To fix this issue, expand the maximum number of irq supported to 256. Signed-off-by: Du Yilong Signed-off-by: Gu Zitao --- arch/sw_64/include/uapi/asm/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sw_64/include/uapi/asm/kvm.h b/arch/sw_64/include/uapi/asm/kvm.h index ff1b6e7f096f..126c2a1d7411 100644 --- a/arch/sw_64/include/uapi/asm/kvm.h +++ b/arch/sw_64/include/uapi/asm/kvm.h @@ -5,7 +5,7 @@ /* * KVM SW specific structures and definitions. */ -#define SWVM_IRQS 64 +#define SWVM_IRQS 256 enum SW64_KVM_IRQ { SW64_KVM_IRQ_IPI = 27, SW64_KVM_IRQ_TIMER = 9, -- Gitee From 76da209d121c95a3dbabd43d062c4d1e83ccfec0 Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Tue, 26 Jul 2022 10:02:06 +0800 Subject: [PATCH 13/77] sw64: fix deep-copy_user by deep-copy_template Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNSZ -------------------------------- Some fp registers are clobbered in deep-copy_user() because this function was assumed to be used only in normal task context and to be safe to clobber caller-save fp registers. However, these assumptions have been proven wrong. Since deep-copy_user() is basically a deep-memcpy() with exception handling, a deep-copy_template() is now used to implement these two functions. Different macro defines and entry/exit code are used by deep-copy_user() and deep-memcpy(). Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/lib/deep-copy_template.S | 299 +++++++++++++++++++++++++ arch/sw_64/lib/deep-copy_user.S | 332 +--------------------------- arch/sw_64/lib/deep-memcpy.S | 297 +------------------------ 3 files changed, 309 insertions(+), 619 deletions(-) create mode 100644 arch/sw_64/lib/deep-copy_template.S diff --git a/arch/sw_64/lib/deep-copy_template.S b/arch/sw_64/lib/deep-copy_template.S new file mode 100644 index 000000000000..8355ecf8a905 --- /dev/null +++ b/arch/sw_64/lib/deep-copy_template.S @@ -0,0 +1,299 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * template for memcpy and copy_user with SIMD + * + * $16: current store address + * $17: current load address + * $18: current bytes left to copy + * + */ + +#define NC_STORE_THRESHOLD 2048 + +#define SAVE_SIMD_REGS \ + ldi $sp, -0x60($sp); \ + addl $sp, 0x1f, $23; \ + bic $23, 0x1f, $23; \ + vstd $f1, 0($23); \ + vstd $f2, 0x20($23) + +#define RESTORE_SIMD_REGS \ + addl $sp, 0x1f, $23; \ + bic $23, 0x1f, $23; \ + vldd $f1, 0($23); \ + vldd $f2, 0x20($23); \ + ldi $sp, 0x60($sp) + +#define SAVE_SIMD_U_REGS \ + ldi $sp, -0x120($sp); \ + addl $sp, 0x1f, $23; \ + bic $23, 0x1f, $23; \ + vstd $f1, 0($23); \ + vstd $f2, 0x20($23); \ + vstd $f4, 0x40($23); \ + vstd $f5, 0x60($23); \ + vstd $f10, 0x80($23); \ + vstd $f11, 0xa0($23); \ + vstd $f20, 0xc0($23); \ + vstd $f21, 0xe0($23) + +#define RESTORE_SIMD_U_REGS \ + addl $sp, 0x1f, $23; \ + bic $23, 0x1f, $23; \ + vldd $f1, 0($23); \ + vldd $f2, 0x20($23); \ + vldd $f4, 0x40($23); \ + vldd $f5, 0x60($23); \ + vldd $f10, 0x80($23); \ + vldd $f11, 0xa0($23); \ + vldd $f20, 0xc0($23); \ + vldd $f21, 0xe0($23); \ + ldi $sp, 0x120($sp) + + ble $18, $out + and $16, 7, $1 + beq $1, $dest_aligned_8 + + .align 4 +$byte_loop_head: + FIXUP_LDST( ldbu $2, 0($17) ) + subl $18, 1, $18 + addl $17, 1, $17 + FIXUP_LDST( stb $2, 0($16) ) + addl $16, 1, $16 + ble $18, $out + and $16, 7, $1 + bne $1, $byte_loop_head + +$dest_aligned_8: + and $17, 7, $4 + subl $18, 16, $18 + blt $18, $quad_end + subl $18, 64, $18 + blt $18, $simd_end + and $16, 31, $1 + beq $1, $dest_aligned_32 + bne $4, $quad_u_loop_head + + .align 5 +$quad_loop_head: + FIXUP_LDST( ldl $2, 0($17) ) + subl $18, 8, $18 + addl $17, 8, $17 + FIXUP_LDST( stl $2, 0($16) ) + addl $16, 8, $16 + and $16, 31, $1 + blt $18, $simd_end + beq $16, $dest_aligned_32 + br $31, $quad_loop_head + +$dest_aligned_32: + and $17, 31, $5 + bne $5, $prep_simd_u_loop + +$prep_simd_loop: + SAVE_SIMD_REGS + ldi $1, NC_STORE_THRESHOLD($31) + cmple $18, $1, $1 + bne $1, $simd_loop + + .align 5 +$simd_loop_nc: + fillcs 128 * 5($17) + FIXUP_LDST( vldd $f1, 0($17) ) + FIXUP_LDST( vldd $f2, 32($17) ) + subl $18, 64, $18 + addl $17, 64, $17 + FIXUP_LDST( vstd_nc $f1, 0($16) ) + FIXUP_LDST( vstd_nc $f2, 32($16) ) + addl $16, 64, $16 + bge $18, $simd_loop_nc + memb # required for _nc store instructions + br $31, $simd_loop_end + + .align 5 +$simd_loop: + fillcs 128 * 5($17) + FIXUP_LDST( vldd $f1, 0($17) ) + FIXUP_LDST( vldd $f2, 32($17) ) + subl $18, 64, $18 + addl $17, 64, $17 + FIXUP_LDST( vstd $f1, 0($16) ) + FIXUP_LDST( vstd $f2, 32($16) ) + addl $16, 64, $16 + bge $18, $simd_loop + +$simd_loop_end: + addl $18, 64, $1 + cmplt $1, 32, $1 + bne $1, $no_more_simd + FIXUP_LDST( vldd $f1, 0($17) ) + subl $18, 32, $18 + addl $17, 32, $17 + FIXUP_LDST( vstd $f1, 0($16) ) + addl $16, 32, $16 + +$no_more_simd: + RESTORE_SIMD_REGS + +$simd_end: + addl $18, 64, $18 + blt $18, $quad_end + bne $4, $prep_quad_u_loop_tail + + .align 4 +$quad_loop_tail: + FIXUP_LDST( ldl $2, 0($17) ) + FIXUP_LDST( ldl $3, 8($17) ) + subl $18, 16, $18 + addl $17, 16, $17 + FIXUP_LDST( stl $2, 0($16) ) + FIXUP_LDST( stl $3, 8($16) ) + addl $16, 16, $16 + bge $18, $quad_loop_tail + +$quad_end: + addl $18, 16, $18 + ble $18, $out + cmplt $18, 8, $1 + bne $1, $byte_loop_tail + bne $4, $move_one_quad_u + +$move_one_quad: + FIXUP_LDST( ldl $2, 0($17) ) + subl $18, 8, $18 + addl $17, 8, $17 + FIXUP_LDST( stl $2, 0($16) ) + addl $16, 8, $16 + ble $18, $out + + .align 4 +$byte_loop_tail: + FIXUP_LDST( ldbu $2, 0($17) ) + subl $18, 1, $18 + addl $17, 1, $17 + FIXUP_LDST( stb $2, 0($16) ) + addl $16, 1, $16 + bgt $18, $byte_loop_tail + br $31, $out + +/* misaligned src and dst */ + .align 5 +$quad_u_loop_head: + FIXUP_LDST( ldl_u $2, 0($17) ) + FIXUP_LDST( ldl_u $3, 7($17) ) + subl $18, 8, $18 + addl $17, 8, $17 + extll $2, $4, $2 + exthl $3, $4, $3 + bis $2, $3, $2 + FIXUP_LDST( stl $2, 0($16) ) + addl $16, 8, $16 + blt $18, $simd_end + beq $16, $dest_aligned_32 + br $31, $quad_u_loop_head + +$prep_simd_u_loop: + SAVE_SIMD_U_REGS + andnot $17, 31, $3 + ldi $2, 256($31) + sll $5, 3, $1 + subl $2, $1, $2 + sll $1, 29, $1 + sll $2, 29, $2 + ifmovd $1, $f1 + ifmovd $2, $f2 + FIXUP_LDST( vldd $f4, 0($3) ) + ldi $1, NC_STORE_THRESHOLD($31) + cmple $18, $1, $1 + bne $1, $simd_u_loop + + .align 5 +$simd_u_loop_nc: + FIXUP_LDST( vldd $f5, 32($3) ) + fillcs 128 * 5($3) + srlow $f4, $f1, $f10 + sllow $f5, $f2, $f11 + vlogfc $f10, $f11, $f31, $f10 + FIXUP_LDST( vldd $f4, 64($3) ) + srlow $f5, $f1, $f20 + sllow $f4, $f2, $f21 + vlogfc $f20, $f21, $f31, $f20 + FIXUP_LDST( vstd_nc $f10, 0($16) ) + FIXUP_LDST( vstd_nc $f20, 32($16) ) + subl $18, 64, $18 + addl $3, 64, $3 + addl $16, 64, $16 + bge $18, $simd_u_loop_nc + memb # required for _nc store instructions + br $31, $simd_u_loop_end + + .align 5 +$simd_u_loop: + FIXUP_LDST( vldd $f5, 32($3) ) + fillcs 128 * 5($3) + srlow $f4, $f1, $f10 + sllow $f5, $f2, $f11 + vlogfc $f10, $f11, $f31, $f10 + FIXUP_LDST( vldd $f4, 64($3) ) + srlow $f5, $f1, $f20 + sllow $f4, $f2, $f21 + vlogfc $f20, $f21, $f31, $f20 + FIXUP_LDST( vstd $f10, 0($16) ) + FIXUP_LDST( vstd $f20, 32($16) ) + subl $18, 64, $18 + addl $3, 64, $3 + addl $16, 64, $16 + bge $18, $simd_u_loop + +$simd_u_loop_end: + addl $18, 64, $1 + cmplt $1, 32, $1 + bne $1, $no_more_simd_u + FIXUP_LDST( vldd $f5, 32($3) ) + srlow $f4, $f1, $f10 + sllow $f5, $f2, $f11 + vlogfc $f10, $f11, $f31, $f10 + FIXUP_LDST( vstd $f10, 0($16) ) + subl $18, 32, $18 + addl $3, 32, $3 + addl $16, 32, $16 + +$no_more_simd_u: + RESTORE_SIMD_U_REGS + bis $3, $5, $17 + br $31, $simd_end + +$prep_quad_u_loop_tail: + FIXUP_LDST( ldl_u $2, 0($17) ) + .align 5 +$quad_u_loop_tail: + FIXUP_LDST( ldl_u $3, 8($17) ) + extll $2, $4, $22 + exthl $3, $4, $23 + bis $22, $23, $22 + FIXUP_LDST( stl $22, 0($16) ) + FIXUP_LDST( ldl_u $2, 16($17) ) + extll $3, $4, $24 + exthl $2, $4, $25 + bis $24, $25, $24 + FIXUP_LDST( stl $24, 8($16) ) + subl $18, 16, $18 + addl $17, 16, $17 + addl $16, 16, $16 + bge $18, $quad_u_loop_tail + br $31, $quad_end + +$move_one_quad_u: + FIXUP_LDST( ldl_u $2, 0($17) ) + FIXUP_LDST( ldl_u $3, 8($17) ) + subl $18, 8, $18 + addl $17, 8, $17 + extll $2, $4, $22 + exthl $3, $4, $23 + bis $22, $23, $22 + FIXUP_LDST( stl $22, 0($16) ) + addl $16, 8, $16 + ble $18, $out + br $31, $byte_loop_tail diff --git a/arch/sw_64/lib/deep-copy_user.S b/arch/sw_64/lib/deep-copy_user.S index 631246c68bab..145e1cc6ba18 100644 --- a/arch/sw_64/lib/deep-copy_user.S +++ b/arch/sw_64/lib/deep-copy_user.S @@ -1,342 +1,22 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copy to/from user space, handling exceptions as we go.. This - * isn't exactly pretty. - * - * This is essentially the same as "memcpy()", but with a few twists. - * Notably, we have to make sure that $18 is always up-to-date and - * contains the right "bytes left to copy" value (and that it is updated - * only _after_ a successful copy). There is also some rather minor - * exception setup stuff.. - * - * Inputs: - * length in $18 - * destination address in $16 - * source address in $17 - * return address in $26 - * - * Outputs: - * bytes left to copy in $0 - * - * Clobbers: - * $1,$2,$3,$4,$5,$16,$17 - * - */ -/* Author: Copy_user simd version 1.1 (20190904) by Gao Xiuwu. -*/ #include /* Allow an exception for an insn; exit if we get one. */ -#define EXI(x, y...) \ - 99: x, ##y; \ +#define FIXUP_LDST(x, y) \ + 99: x, y; \ .section __ex_table, "a"; \ .long 99b - .; \ - ldi $31, $exitin-99b($31); \ + ldi $31, $out-99b($31); \ .previous -#define EXO(x,y...) \ - 99: x, ##y; \ - .section __ex_table, "a"; \ - .long 99b - .; \ - ldi $31, $exitout-99b($31); \ - .previous - - .set noat - .align 4 .globl __copy_user .ent __copy_user - __copy_user: .prologue 0 - subl $18, 32, $1 - beq $18, $zerolength - - and $16, 7, $3 - ble $1, $onebyteloop - beq $3, $destaligned - subl $3, 8, $3 -/* - * The fetcher stall also hides the 1 cycle cross-cluster stall for $3 (L --> U) - * This loop aligns the destination a byte at a time - * We know we have at least one trip through this loop - */ -$aligndest: - EXI(ldbu $1, 0($17)) - addl $16, 1, $16 - addl $3, 1, $3 - -/* - * the -1 is to compensate for the inc($16) done in a previous quadpack - * which allows us zero dependencies within either quadpack in the loop - */ - EXO(stb $1, -1($16)) - addl $17, 1, $17 - subl $18, 1, $18 - bne $3, $aligndest - -/* - * If we fell through into here, we have a minimum of 33 - 7 bytes - * If we arrived via branch, we have a minimum of 32 bytes - */ -$destaligned: - and $17, 7, $1 - bic $18, 7, $4 - #EXI(ldl_u $3, 0($17)) - beq $1, $quadaligned - -#ifndef MISQUAD_SCALAR -$misquad: - and $16, 31, $1 - beq $1, $dest32Baligned - -$align_32B: - EXI(ldbu $1, 0($17)) - addl $17, 1, $17 - EXO(stb $1, 0($16)) - subl $18, 1, $18 - addl $16, 1, $16 - and $16, 31, $1 - beq $18, $exitout - bne $1, $align_32B - -$dest32Baligned: - ldi $2, 256($31) - andnot $17, 31, $3 - EXI(vldd $f10, 0($3)) - and $17, 31, $5 - sll $5, 3, $5 - subw $2, $5, $4 - ifmovs $5, $f15 - ifmovs $4, $f14 - - cmple $18, 63, $1 - bne $1, $misalign_tail_simd - -$misalign_body_simd: - EXI(vldd $f11, 32($3)) - fillcs 128*5($3) - - srlow $f10, $f15, $f12 - sllow $f11, $f14, $f13 - #fillde 128*5($16) - vlogfc $f12, $f13, $f31, $f12 - - EXI(vldd $f10, 64($3)) - srlow $f11, $f15, $f22 - sllow $f10, $f14, $f23 - vlogfc $f22, $f23, $f31, $f22 - - EXO(vstd $f12, 0($16)) - EXO(vstd $f22, 32($16)) - - addl $16, 64, $16 - addl $3, 64, $3 - subl $18, 64, $18 - - cmple $18, 63, $1 - beq $1, $misalign_body_simd - br $misalign_tail_simd - -$misalign_tail_simd: - cmple $18, 31, $1 - bne $1, $before_misalign_tail_quads - - EXI(vldd $f11, 32($3)) - srlow $f10, $f15, $f12 - sllow $f11, $f14, $f13 - vlogfc $f12, $f13, $f31, $f12 - - EXO(vstd $f12, 0($16)) - - subl $18, 32, $18 - addl $16, 32, $16 - addl $3, 32, $3 - vfmov $f11, $f10 - -$before_misalign_tail_quads: - srlow $f10, $f15, $f12 - s8subl $18, $4, $1 - ble $1, $tail_quads - - EXI(vldd $f11, 32($3)) - sllow $f11, $f14, $f13 - vlogfc $f12, $f13, $f31, $f12 - -$tail_quads: - subl $18, 8, $1 - blt $1, $less_than_8 - -$move_a_quad: - fimovd $f12, $1 - srlow $f12, 64, $f12 - - EXO(stl $1, 0($16)) - subl $18, 8, $18 - addl $16, 8, $16 - subl $18, 8, $1 - bge $1, $move_a_quad - -$less_than_8: - .align 4 - beq $18, $exitout - fimovd $f12, $1 - -$tail_bytes: - EXO(stb $1, 0($16)) - subl $18, 1, $18 - srl $1, 8, $1 - addl $16, 1, $16 - bgt $18, $tail_bytes - br $exitout -#else - -/* - * In the worst case, we've just executed an ldl_u here from 0($17) - * and we'll repeat it once if we take the branch - */ - -/* Misaligned quadword loop - not unrolled. Leave it that way. */ -$misquad: - EXI(ldl_u $2, 8($17)) - subl $4, 8, $4 - extll $3, $17, $3 - exthl $2, $17, $1 - - bis $3, $1, $1 - EXO(stl $1, 0($16)) - addl $17, 8, $17 - subl $18, 8, $18 - - addl $16, 8, $16 - bis $2, $2, $3 - bne $4, $misquad - - beq $18, $zerolength - -/* We know we have at least one trip through the byte loop */ - EXI(ldbu $2, 0($17)) - addl $16, 1, $16 - br $31, $dirtyentry -#endif -/* Do the trailing byte loop load, then hop into the store part of the loop */ - -/* - * A minimum of (33 - 7) bytes to do a quad at a time. - * Based upon the usage context, it's worth the effort to unroll this loop - * $18 - number of bytes to be moved - * $4 - number of bytes to move as quadwords - * $16 is current destination address - * $17 is current source address - */ - -$quadaligned: - and $16, 31, $1 - beq $1, $quadaligned_dest32Baligned - -$quadaligned_align_32B: - EXI(ldl $1, 0($17)) - addl $17, 8, $17 - EXO(stl $1, 0($16)) - subl $18, 8, $18 - subl $4, 8, $4 - addl $16, 8, $16 - and $16, 31, $1 - beq $4, $onebyteloop - bne $1, $quadaligned_align_32B - -$quadaligned_dest32Baligned: - and $17, 31, $2 - bne $2, $dest32Baligned - -$quad32Bailgned: - subl $4, 64, $2 - blt $2, $onequad - -/* - * There is a significant assumption here that the source and destination - * addresses differ by more than 32 bytes. In this particular case, a - * sparsity of registers further bounds this to be a minimum of 8 bytes. - * But if this isn't met, then the output result will be incorrect. - * Furthermore, due to a lack of available registers, we really can't - * unroll this to be an 8x loop (which would enable us to use the wh64 - * instruction memory hint instruction). - */ - -$simd_quadalign_unroll2: - fillcs 128 * 5($17) - EXI(vldd $f22, 0($17)) - EXI(vldd $f23, 32($17)) - EXO(vstd $f22, 0($16)) - EXO(vstd $f23, 32($16)) - #fillde 128 * 5($16) - subl $4, 64, $4 - subl $18, 64, $18 - addl $17, 64, $17 - addl $16, 64, $16 - subl $4, 64, $3 - bge $3, $simd_quadalign_unroll2 - bne $4, $onequad - br $31, $noquads - -$onequad: - EXI(ldl $1, 0($17)) - subl $4, 8, $4 - addl $17, 8, $17 - - EXO(stl $1, 0($16)) - subl $18, 8, $18 - addl $16, 8, $16 - bne $4, $onequad - -$noquads: - beq $18, $zerolength - -/* - * For small copies (or the tail of a larger copy), do a very simple byte loop. - * There's no point in doing a lot of complex alignment calculations to try to - * to quadword stuff for a small amount of data. - * $18 - remaining number of bytes left to copy - * $16 - current dest addr - * $17 - current source addr - */ - -$onebyteloop: - EXI(ldbu $2, 0($17)) - addl $16, 1, $16 - -$dirtyentry: -/* - * the -1 is to compensate for the inc($16) done in a previous quadpack - * which allows us zero dependencies within either quadpack in the loop - */ - EXO(stb $2, -1($16)) - addl $17, 1, $17 - subl $18, 1, $18 - bgt $18, $onebyteloop - -$zerolength: -$exitout: +#include "deep-copy_template.S" +$out: bis $31, $18, $0 - ret $31, ($26), 1 - -$exitin: - - /* A stupid byte-by-byte zeroing of the rest of the output - * buffer. This cures security holes by never leaving - * random kernel data around to be copied elsewhere. - */ - - mov $18, $1 - -$101: - EXO(stb $31, 0($16)) - subl $1, 1, $1 - addl $16, 1, $16 - bgt $1, $101 - - bis $31, $18, $0 - ret $31, ($26), 1 - + ret .end __copy_user EXPORT_SYMBOL(__copy_user) diff --git a/arch/sw_64/lib/deep-memcpy.S b/arch/sw_64/lib/deep-memcpy.S index 83c726d42778..c4b5bf3d26df 100644 --- a/arch/sw_64/lib/deep-memcpy.S +++ b/arch/sw_64/lib/deep-memcpy.S @@ -2,307 +2,18 @@ #include -#define NC_STORE_THRESHOLD 2048 +#define FIXUP_LDST(x, y) \ + x, y -#define SAVE_SIMD_REGS \ - ldi $sp, -0x60($sp); \ - addl $sp, 0x1f, $23; \ - bic $23, 0x1f, $23; \ - vstd $f1, 0($23); \ - vstd $f2, 0x20($23) - -#define RESTORE_SIMD_REGS \ - addl $sp, 0x1f, $23; \ - bic $23, 0x1f, $23; \ - vldd $f1, 0($23); \ - vldd $f2, 0x20($23); \ - ldi $sp, 0x60($sp) - -#define SAVE_SIMD_U_REGS \ - ldi $sp, -0x120($sp); \ - addl $sp, 0x1f, $23; \ - bic $23, 0x1f, $23; \ - vstd $f1, 0($23); \ - vstd $f2, 0x20($23); \ - vstd $f4, 0x40($23); \ - vstd $f5, 0x60($23); \ - vstd $f10, 0x80($23); \ - vstd $f11, 0xa0($23); \ - vstd $f20, 0xc0($23); \ - vstd $f21, 0xe0($23) - -#define RESTORE_SIMD_U_REGS \ - addl $sp, 0x1f, $23; \ - bic $23, 0x1f, $23; \ - vldd $f1, 0($23); \ - vldd $f2, 0x20($23); \ - vldd $f4, 0x40($23); \ - vldd $f5, 0x60($23); \ - vldd $f10, 0x80($23); \ - vldd $f11, 0xa0($23); \ - vldd $f20, 0xc0($23); \ - vldd $f21, 0xe0($23); \ - ldi $sp, 0x120($sp) - - .set noat - .align 4 .globl memcpy .ent memcpy memcpy: .frame $30, 0, $26, 0 .prologue 0 - mov $16, $0 - ble $18, $out - and $16, 7, $1 - beq $1, $dest_aligned_8 - - .align 4 -$byte_loop_head: - ldbu $2, 0($17) - subl $18, 1, $18 - addl $17, 1, $17 - stb $2, 0($16) - addl $16, 1, $16 - ble $18, $out - and $16, 7, $1 - bne $1, $byte_loop_head - -$dest_aligned_8: - and $17, 7, $4 - subl $18, 16, $18 - blt $18, $quad_end - subl $18, 64, $18 - blt $18, $simd_end - and $16, 31, $1 - beq $1, $dest_aligned_32 - bne $4, $quad_u_loop_head - - .align 5 -$quad_loop_head: - ldl $2, 0($17) - subl $18, 8, $18 - addl $17, 8, $17 - stl $2, 0($16) - addl $16, 8, $16 - and $16, 31, $1 - blt $18, $simd_end - beq $16, $dest_aligned_32 - br $31, $quad_loop_head - -$dest_aligned_32: - and $17, 31, $5 - bne $5, $prep_simd_u_loop - -$prep_simd_loop: - SAVE_SIMD_REGS - ldi $1, NC_STORE_THRESHOLD($31) - cmple $18, $1, $1 - bne $1, $simd_loop - - .align 5 -$simd_loop_nc: - fillcs 128 * 5($17) - vldd $f1, 0($17) - vldd $f2, 32($17) - subl $18, 64, $18 - addl $17, 64, $17 - vstd_nc $f1, 0($16) - vstd_nc $f2, 32($16) - addl $16, 64, $16 - bge $18, $simd_loop_nc - memb # required for _nc store instructions - br $31, $simd_loop_end - - .align 5 -$simd_loop: - fillcs 128 * 5($17) - vldd $f1, 0($17) - vldd $f2, 32($17) - subl $18, 64, $18 - addl $17, 64, $17 - vstd $f1, 0($16) - vstd $f2, 32($16) - addl $16, 64, $16 - bge $18, $simd_loop - -$simd_loop_end: - addl $18, 64, $1 - cmplt $1, 32, $1 - bne $1, $no_more_simd - vldd $f1, 0($17) - subl $18, 32, $18 - addl $17, 32, $17 - vstd $f1, 0($16) - addl $16, 32, $16 - -$no_more_simd: - RESTORE_SIMD_REGS - -$simd_end: - addl $18, 64, $18 - blt $18, $quad_end - bne $4, $prep_quad_u_loop_tail - - .align 4 -$quad_loop_tail: - ldl $2, 0($17) - ldl $3, 8($17) - subl $18, 16, $18 - addl $17, 16, $17 - stl $2, 0($16) - stl $3, 8($16) - addl $16, 16, $16 - bge $18, $quad_loop_tail - -$quad_end: - addl $18, 16, $18 - ble $18, $out - cmplt $18, 8, $1 - bne $1, $byte_loop_tail - bne $4, $move_one_quad_u - -$move_one_quad: - ldl $2, 0($17) - subl $18, 8, $18 - addl $17, 8, $17 - stl $2, 0($16) - addl $16, 8, $16 - ble $18, $out - - .align 4 -$byte_loop_tail: - ldbu $2, 0($17) - subl $18, 1, $18 - addl $17, 1, $17 - stb $2, 0($16) - addl $16, 1, $16 - bgt $18, $byte_loop_tail - +#include "deep-copy_template.S" $out: - ret $31, ($26), 1 - - - - .align 5 -$quad_u_loop_head: - ldl_u $2, 0($17) - ldl_u $3, 7($17) - subl $18, 8, $18 - addl $17, 8, $17 - extll $2, $4, $2 - exthl $3, $4, $3 - bis $2, $3, $2 - stl $2, 0($16) - addl $16, 8, $16 - blt $18, $simd_end - beq $16, $dest_aligned_32 - br $31, $quad_u_loop_head - -$prep_simd_u_loop: - SAVE_SIMD_U_REGS - andnot $17, 31, $3 - ldi $2, 256($31) - sll $5, 3, $1 - subl $2, $1, $2 - sll $1, 29, $1 - sll $2, 29, $2 - ifmovd $1, $f1 - ifmovd $2, $f2 - vldd $f4, 0($3) - ldi $1, NC_STORE_THRESHOLD($31) - cmple $18, $1, $1 - bne $1, $simd_u_loop - - .align 5 -$simd_u_loop_nc: - vldd $f5, 32($3) - fillcs 128 * 5($3) - srlow $f4, $f1, $f10 - sllow $f5, $f2, $f11 - vlogfc $f10, $f11, $f31, $f10 - vldd $f4, 64($3) - srlow $f5, $f1, $f20 - sllow $f4, $f2, $f21 - vlogfc $f20, $f21, $f31, $f20 - vstd_nc $f10, 0($16) - vstd_nc $f20, 32($16) - subl $18, 64, $18 - addl $3, 64, $3 - addl $16, 64, $16 - bge $18, $simd_u_loop_nc - memb # required for _nc store instructions - br $31, $simd_u_loop_end - - .align 5 -$simd_u_loop: - vldd $f5, 32($3) - fillcs 128 * 5($3) - srlow $f4, $f1, $f10 - sllow $f5, $f2, $f11 - vlogfc $f10, $f11, $f31, $f10 - vldd $f4, 64($3) - srlow $f5, $f1, $f20 - sllow $f4, $f2, $f21 - vlogfc $f20, $f21, $f31, $f20 - vstd $f10, 0($16) - vstd $f20, 32($16) - subl $18, 64, $18 - addl $3, 64, $3 - addl $16, 64, $16 - bge $18, $simd_u_loop - -$simd_u_loop_end: - addl $18, 64, $1 - cmplt $1, 32, $1 - bne $1, $no_more_simd_u - vldd $f5, 32($3) - srlow $f4, $f1, $f10 - sllow $f5, $f2, $f11 - vlogfc $f10, $f11, $f31, $f10 - vstd $f10, 0($16) - subl $18, 32, $18 - addl $3, 32, $3 - addl $16, 32, $16 - -$no_more_simd_u: - RESTORE_SIMD_U_REGS - bis $3, $5, $17 - br $31, $simd_end - -$prep_quad_u_loop_tail: - ldl_u $2, 0($17) - .align 5 -$quad_u_loop_tail: - ldl_u $3, 8($17) - extll $2, $4, $22 - exthl $3, $4, $23 - bis $22, $23, $22 - stl $22, 0($16) - ldl_u $2, 16($17) - extll $3, $4, $24 - exthl $2, $4, $25 - bis $24, $25, $24 - stl $24, 8($16) - subl $18, 16, $18 - addl $17, 16, $17 - addl $16, 16, $16 - bge $18, $quad_u_loop_tail - br $31, $quad_end - -$move_one_quad_u: - ldl_u $2, 0($17) - ldl_u $3, 8($17) - subl $18, 8, $18 - addl $17, 8, $17 - extll $2, $4, $22 - exthl $3, $4, $23 - bis $22, $23, $22 - stl $22, 0($16) - addl $16, 8, $16 - ble $18, $out - br $31, $byte_loop_tail - + ret .end memcpy EXPORT_SYMBOL(memcpy) __memcpy = memcpy -- Gitee From dcc9d3e58fd2c35d6648c42cd1498e670615669d Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Tue, 26 Jul 2022 10:03:58 +0800 Subject: [PATCH 14/77] sw64: remove context check in csum_partial_copy_from_user() Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNSZ -------------------------------- This reverts commit ce6455155eeacae6557fe0142e69a6c288ac970e. In previous patch, we fixed deep-copy_user(). It's now safe to use it in any context, so no need to check. Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/lib/csum_partial_copy.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/sw_64/lib/csum_partial_copy.c b/arch/sw_64/lib/csum_partial_copy.c index 742dd63cdb70..1a8c18757e09 100644 --- a/arch/sw_64/lib/csum_partial_copy.c +++ b/arch/sw_64/lib/csum_partial_copy.c @@ -61,10 +61,7 @@ csum_partial_cfu_dest_aligned(const unsigned long __user *src, unsigned long checksum = ~0U; int err = 0; - if (likely(!uaccess_kernel())) - err = __copy_from_user(dst, src, len + 8); - else - memcpy(dst, src, len + 8); + err = __copy_from_user(dst, src, len+8); while (len > 0) { word = *dst; @@ -93,10 +90,7 @@ csum_partial_cfu_dest_unaligned(const unsigned long __user *src, unsigned long checksum = ~0U; int err = 0; - if (likely(!uaccess_kernel())) - err = __copy_from_user(dst, src, len + 8); - else - memcpy(dst, src, len + 8); + err = __copy_from_user(dst, src, len+8); dst = (unsigned long *)((unsigned long)dst & (~7UL)); word = *dst; -- Gitee From 08a40a7de40b27a02e867b4f561c1e46addaec9d Mon Sep 17 00:00:00 2001 From: Wu Liliu Date: Tue, 26 Jul 2022 17:41:11 +0800 Subject: [PATCH 15/77] sw64: rename dik_* methods Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Since die_if_kernel() has been removed, the prefix `dik_` is not appropriate. Remove this prefix and simplify the implementation if there is no one to forward the exception to. Signed-off-by: Wu Liliu Signed-off-by: Gu Zitao --- arch/sw_64/kernel/process.c | 11 ----------- arch/sw_64/kernel/proto.h | 2 +- arch/sw_64/kernel/ptrace.c | 2 +- arch/sw_64/kernel/traps.c | 31 +++++++------------------------ arch/sw_64/mm/fault.c | 4 ++-- 5 files changed, 11 insertions(+), 39 deletions(-) diff --git a/arch/sw_64/kernel/process.c b/arch/sw_64/kernel/process.c index 2508c55311ca..4d223a7255bb 100644 --- a/arch/sw_64/kernel/process.c +++ b/arch/sw_64/kernel/process.c @@ -102,17 +102,6 @@ void machine_power_off(void) } -/* Used by sysrq-p, among others. I don't believe r9-r15 are ever - * saved in the context it's used. - */ - -void -show_regs(struct pt_regs *regs) -{ - show_regs_print_info(KERN_DEFAULT); - dik_show_regs(regs); -} - /* * Re-start a thread when doing execve() */ diff --git a/arch/sw_64/kernel/proto.h b/arch/sw_64/kernel/proto.h index f2b77d370da1..f84629ec05ea 100644 --- a/arch/sw_64/kernel/proto.h +++ b/arch/sw_64/kernel/proto.h @@ -12,7 +12,7 @@ extern int ptrace_set_bpt(struct task_struct *child); extern int ptrace_cancel_bpt(struct task_struct *child); /* traps.c */ -extern void dik_show_regs(struct pt_regs *regs); +extern void show_regs(struct pt_regs *regs); extern void die(char *str, struct pt_regs *regs, long err); /* timer.c */ diff --git a/arch/sw_64/kernel/ptrace.c b/arch/sw_64/kernel/ptrace.c index bdbd0d97a130..064296711b2f 100644 --- a/arch/sw_64/kernel/ptrace.c +++ b/arch/sw_64/kernel/ptrace.c @@ -487,7 +487,7 @@ int do_match(unsigned long address, unsigned long mmcsr, long cause, struct pt_r case MMCSR__DA_MATCH: case MMCSR__DV_MATCH: case MMCSR__DAV_MATCH: - dik_show_regs(regs); + show_regs(regs); if (!(current->ptrace & PT_PTRACED)) { printk(" pid %d %s not be ptraced, return\n", current->pid, current->comm); diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index 5f2348dd087f..9915160d95d2 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -31,8 +31,10 @@ #include "proto.h" -void dik_show_regs(struct pt_regs *regs) +void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_DEFAULT); + printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx %s\n", regs->pc, regs->r26, regs->ps, print_tainted()); printk("pc is at %pSR\n", (void *)regs->pc); @@ -62,8 +64,7 @@ void dik_show_regs(struct pt_regs *regs) printk("gp = %016lx sp = %p\n", regs->gp, regs+1); } -static void -dik_show_code(unsigned int *pc) +static void show_code(unsigned int *pc) { long i; unsigned int insn; @@ -93,15 +94,11 @@ void die(char *str, struct pt_regs *regs, long err) pr_emerg("%s [#%d]\n", str, ++die_counter); -#ifdef CONFIG_SMP - printk("CPU %d ", hard_smp_processor_id()); -#endif - printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err); - ret = notify_die(DIE_OOPS, str, regs, err, 0, SIGSEGV); print_modules(); - dik_show_regs(regs); + show_regs(regs); + show_code((unsigned int *)regs->pc); show_stack(current, NULL, KERN_EMERG); bust_spinlocks(0); @@ -508,21 +505,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg, * Since the registers are in a weird format, dump them ourselves. */ - printk("%s(%d): unhandled unaligned exception\n", - current->comm, task_pid_nr(current)); - - dik_show_regs(regs); - dik_show_code((unsigned int *)pc); - show_stack(current, NULL, KERN_EMERG); - - if (test_and_set_thread_flag(TIF_DIE_IF_KERNEL)) { - printk("die_if_kernel recursion detected.\n"); - local_irq_enable(); - while (1) - asm("nop"); - } - do_exit(SIGSEGV); - + die("Unhandled unaligned exception", regs, error); } /* diff --git a/arch/sw_64/mm/fault.c b/arch/sw_64/mm/fault.c index 3255de5f0019..541e30b4b84c 100644 --- a/arch/sw_64/mm/fault.c +++ b/arch/sw_64/mm/fault.c @@ -32,7 +32,7 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned long mmcsr) #endif extern void die(char *, struct pt_regs *, long); -extern void dik_show_regs(struct pt_regs *regs); +extern void show_regs(struct pt_regs *regs); void show_all_vma(void) { @@ -325,7 +325,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, if (unlikely(segv_debug_enabled)) { pr_info("fault: want to send_segv: pid %d, cause = %#lx, mmcsr = %#lx, address = %#lx, pc %#lx\n", current->pid, cause, mmcsr, address, regs->pc); - dik_show_regs(regs); + show_regs(regs); show_all_vma(); } -- Gitee From cfb6c2c687c58e39c4031e069be2184272395757 Mon Sep 17 00:00:00 2001 From: He Sheng Date: Tue, 26 Jul 2022 13:41:18 +0800 Subject: [PATCH 16/77] sw64: rename TLB invalidate helpers Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Rename these helpers and their usages according to SW64 architecture manuals. Signed-off-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/hmcall.h | 24 ++++++++++++++++++++---- arch/sw_64/include/asm/mmu_context.h | 4 ++-- arch/sw_64/include/asm/tlbflush.h | 6 +++--- arch/sw_64/kernel/head.S | 2 +- arch/sw_64/kernel/hibernate.c | 3 +-- arch/sw_64/kernel/machine_kexec.c | 3 --- arch/sw_64/kernel/smp.c | 2 +- arch/sw_64/mm/init.c | 2 +- 8 files changed, 29 insertions(+), 17 deletions(-) diff --git a/arch/sw_64/include/asm/hmcall.h b/arch/sw_64/include/asm/hmcall.h index 310cc61a5a34..0e609e9cade7 100644 --- a/arch/sw_64/include/asm/hmcall.h +++ b/arch/sw_64/include/asm/hmcall.h @@ -193,12 +193,28 @@ __CALL_HMC_RW2(cpuid, unsigned long, unsigned long, unsigned long); }) #define tbi(x, y) __tbi(x, __r17 = (y), "1" (__r17)) -#define tbisi(x) __tbi(1, __r17 = (x), "1" (__r17)) -#define tbisd(x) __tbi(2, __r17 = (x), "1" (__r17)) -#define tbis(x) __tbi(3, __r17 = (x), "1" (__r17)) -#define tbiap() __tbi(-1, /* no second argument */) + +/* Invalidate all TLB, only used by hypervisor */ #define tbia() __tbi(-2, /* no second argument */) +/* Invalidate TLB for all processes with currnet VPN */ +#define tbivp() __tbi(-1, /* no second argument */) + +/* Invalidate all TLB with current VPN */ +#define tbiv() __tbi(0, /* no second argument */) + +/* Invalidate ITLB of addr with current UPN and VPN */ +#define tbisi(addr) __tbi(1, __r17 = (addr), "1" (__r17)) + +/* Invalidate DTLB of addr with current UPN and VPN */ +#define tbisd(addr) __tbi(2, __r17 = (addr), "1" (__r17)) + +/* Invalidate TLB of addr with current UPN and VPN */ +#define tbis(addr) __tbi(3, __r17 = (addr), "1" (__r17)) + +/* Invalidate all user TLB with current UPN and VPN */ +#define tbiu() __tbi(4, /* no second argument */) + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/sw_64/include/asm/mmu_context.h b/arch/sw_64/include/asm/mmu_context.h index a797673273af..762ffbf72dbb 100644 --- a/arch/sw_64/include/asm/mmu_context.h +++ b/arch/sw_64/include/asm/mmu_context.h @@ -72,7 +72,7 @@ __reload_thread(struct pcb_struct *pcb) * need to do "p->mm->context = 0". * * If we need more ASN's than the processor has, we invalidate the old - * user TLB's (tbiap()) and start a new ASN version. That will automatically + * user TLB's (tbivp()) and start a new ASN version. That will automatically * force a new asn for any other processes the next time they want to * run. */ @@ -84,7 +84,7 @@ __get_new_mm_context(struct mm_struct *mm, long cpu) unsigned long next = asn + 1; if ((asn & HARDWARE_ASN_MASK) >= HARDWARE_ASN_MASK) { - tbiap(); + tbivp(); next = (asn & ~HARDWARE_ASN_MASK) + ASN_FIRST_VERSION; } cpu_last_asn(cpu) = next; diff --git a/arch/sw_64/include/asm/tlbflush.h b/arch/sw_64/include/asm/tlbflush.h index 7805bb287257..1d6a0db2a054 100644 --- a/arch/sw_64/include/asm/tlbflush.h +++ b/arch/sw_64/include/asm/tlbflush.h @@ -28,11 +28,11 @@ static inline void flush_tlb_current_page(struct mm_struct *mm, unsigned long addr) { if (vma->vm_flags & VM_EXEC) { - tbi(3, addr); + tbis(addr); if (icache_is_vivt_no_ictag()) imb(); } else - tbi(2, addr); + tbisd(addr); } @@ -65,7 +65,7 @@ static inline void flush_tlb_other(struct mm_struct *mm) */ static inline void flush_tlb_all(void) { - tbia(); + tbiv(); } /* Flush a specified user mapping. */ diff --git a/arch/sw_64/kernel/head.S b/arch/sw_64/kernel/head.S index 5fff0f33c9e2..e43499d18357 100644 --- a/arch/sw_64/kernel/head.S +++ b/arch/sw_64/kernel/head.S @@ -71,7 +71,7 @@ __smp_callin: br $27, 2f # we copy this from above "br $27 1f" 2: ldgp $29, 0($27) # First order of business, load the GP. - subl $31, 2, $16 + bis $31, $31, $16 # invalidate all TLB with current VPN sys_call HMC_tbi sys_call HMC_whami # Get hard cid diff --git a/arch/sw_64/kernel/hibernate.c b/arch/sw_64/kernel/hibernate.c index 33426e3ed305..799706db5b94 100644 --- a/arch/sw_64/kernel/hibernate.c +++ b/arch/sw_64/kernel/hibernate.c @@ -27,8 +27,7 @@ void restore_processor_state(void) wrpcbb(vcb->pcbb); wrptbr(vcb->ptbr); sflush(); - tbia(); - imb(); + tbiv(); } int swsusp_arch_resume(void) diff --git a/arch/sw_64/kernel/machine_kexec.c b/arch/sw_64/kernel/machine_kexec.c index c9ca7a728bd4..950998476cda 100644 --- a/arch/sw_64/kernel/machine_kexec.c +++ b/arch/sw_64/kernel/machine_kexec.c @@ -204,9 +204,6 @@ void machine_kexec(struct kimage *image) pr_info("Will call new kernel at %08lx\n", image->start); pr_info("Bye ...\n"); - //flush_cache_all(); - //sflush(); - //tbia(); smp_wmb(); ((noretfun_t) reboot_code_buffer)(); } diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index 1004e9e3be27..1c534b22dc26 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -511,7 +511,7 @@ EXPORT_SYMBOL(smp_imb); static void ipi_flush_tlb_all(void *ignored) { - tbia(); + tbiv(); } void flush_tlb_all(void) diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index e0096a0b432a..6ed1ef8e020c 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -104,7 +104,7 @@ switch_to_system_map(void) init_thread_info.pcb.ptbr = newptbr; init_thread_info.pcb.flags = 1; /* set FEN, clear everything else */ original_pcb_ptr = load_PCB(&init_thread_info.pcb); - tbia(); + tbiv(); } void __init callback_init(void) -- Gitee From 2d37cdf1136b3ca23d6ef59fb838132ebff5ef95 Mon Sep 17 00:00:00 2001 From: He Sheng Date: Tue, 26 Jul 2022 14:56:02 +0800 Subject: [PATCH 17/77] sw64: simplify icache flush interfaces Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- SW64 architecture manuals say that icache of C3A/C3B is VIVT with ICtag which is mapped to physical memory. That means icache doesn't need to be flushed when instruction pages change. Signed-off-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/cacheflush.h | 92 ++--------------------------- arch/sw_64/include/asm/hw_init.h | 8 --- arch/sw_64/include/asm/tlbflush.h | 6 +- arch/sw_64/kernel/smp.c | 57 ------------------ 4 files changed, 7 insertions(+), 156 deletions(-) diff --git a/arch/sw_64/include/asm/cacheflush.h b/arch/sw_64/include/asm/cacheflush.h index 985161896f71..536b0b7b78bd 100644 --- a/arch/sw_64/include/asm/cacheflush.h +++ b/arch/sw_64/include/asm/cacheflush.h @@ -2,94 +2,12 @@ #ifndef _ASM_SW64_CACHEFLUSH_H #define _ASM_SW64_CACHEFLUSH_H -#include -#include - -/* Caches aren't brain-dead on the sw64. */ -#define flush_cache_all() do { } while (0) -#define flush_cache_mm(mm) do { } while (0) -#define flush_cache_dup_mm(mm) do { } while (0) -#define flush_cache_range(vma, start, end) do { } while (0) -#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 -#define flush_dcache_page(page) do { } while (0) -#define flush_dcache_mmap_lock(mapping) do { } while (0) -#define flush_dcache_mmap_unlock(mapping) do { } while (0) -#define flush_cache_vmap(start, end) do { } while (0) -#define flush_cache_vunmap(start, end) do { } while (0) - -/* Note that the following two definitions are _highly_ dependent - * on the contexts in which they are used in the kernel. I personally - * think it is criminal how loosely defined these macros are. +/* + * DCache: PIPT + * ICache: + * - C3A/B is VIVT with ICTAG, support coherence. + * - C4 is VIPT */ - -/* We need to flush the kernel's icache after loading modules. The - * only other use of this macro is in load_aout_interp which is not - * used on sw64. - - * Note that this definition should *not* be used for userspace - * icache flushing. While functional, it is _way_ overkill. The - * icache is tagged with ASNs and it suffices to allocate a new ASN - * for the process. - */ -#ifndef CONFIG_SMP -static inline void -flush_icache_range(unsigned long start, unsigned long end) -{ - if (icache_is_vivt_no_ictag()) - imb(); -} -#define flush_icache_range flush_icache_range -#else -extern void smp_imb(void); -static inline void -flush_icache_range(unsigned long start, unsigned long end) -{ - if (icache_is_vivt_no_ictag()) - smp_imb(); -} -#define flush_icache_range flush_icache_range -#endif - -/* We need to flush the userspace icache after setting breakpoints in - * ptrace. - - * Instead of indiscriminately using imb, take advantage of the fact - * that icache entries are tagged with the ASN and load a new mm context. - */ -/* ??? Ought to use this in arch/sw_64/kernel/signal.c too. */ - -#ifndef CONFIG_SMP -#include - -extern void __load_new_mm_context(struct mm_struct *); -static inline void -flush_icache_user_page(struct vm_area_struct *vma, struct page *page, - unsigned long addr, int len) -{ - if ((vma->vm_flags & VM_EXEC) && icache_is_vivt_no_ictag()) - imb(); -} -#define flush_icache_user_page flush_icache_user_page -#else -extern void flush_icache_user_page(struct vm_area_struct *vma, - struct page *page, - unsigned long addr, int len); -#define flush_icache_user_page flush_icache_user_page -#endif - -/* This is used only in __do_fault and do_swap_page. */ -#define flush_icache_page(vma, page) \ - flush_icache_user_page((vma), (page), 0, 0) - -#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ -do { \ - memcpy(dst, src, len); \ - flush_icache_user_page(vma, page, vaddr, len); \ -} while (0) -#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ - memcpy(dst, src, len) - #include #endif /* _ASM_SW64_CACHEFLUSH_H */ diff --git a/arch/sw_64/include/asm/hw_init.h b/arch/sw_64/include/asm/hw_init.h index de9f93f9b26e..545e9a99a49c 100644 --- a/arch/sw_64/include/asm/hw_init.h +++ b/arch/sw_64/include/asm/hw_init.h @@ -85,14 +85,6 @@ static inline unsigned long get_cpu_freq(void) return cpu_desc.frequency; } -static inline bool icache_is_vivt_no_ictag(void) -{ - /* - * Icache of C3B is vivt with ICtag. C4 will be vipt. - */ - return (cpu_desc.arch_var == 0x3 && cpu_desc.arch_rev == 0x1); -} - #define EMUL_FLAG (0x1UL << 63) #define MMSIZE_MASK (EMUL_FLAG - 1) diff --git a/arch/sw_64/include/asm/tlbflush.h b/arch/sw_64/include/asm/tlbflush.h index 1d6a0db2a054..e508a4d66d37 100644 --- a/arch/sw_64/include/asm/tlbflush.h +++ b/arch/sw_64/include/asm/tlbflush.h @@ -27,11 +27,9 @@ static inline void flush_tlb_current_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr) { - if (vma->vm_flags & VM_EXEC) { + if (vma->vm_flags & VM_EXEC) tbis(addr); - if (icache_is_vivt_no_ictag()) - imb(); - } else + else tbisd(addr); } diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index 1c534b22dc26..b95873a2696d 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -496,19 +496,6 @@ void native_send_call_func_single_ipi(int cpu) send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC); } -static void -ipi_imb(void *ignored) -{ - imb(); -} - -void smp_imb(void) -{ - /* Must wait other processors to flush their icache before continue. */ - on_each_cpu(ipi_imb, NULL, 1); -} -EXPORT_SYMBOL(smp_imb); - static void ipi_flush_tlb_all(void *ignored) { tbiv(); @@ -628,50 +615,6 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned l } EXPORT_SYMBOL(flush_tlb_range); -static void ipi_flush_icache_page(void *x) -{ - struct mm_struct *mm = (struct mm_struct *) x; - - if (mm == current->mm) - __load_new_mm_context(mm); - else - flush_tlb_other(mm); -} - -void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, - unsigned long addr, int len) -{ - struct mm_struct *mm = vma->vm_mm; - - if ((vma->vm_flags & VM_EXEC) == 0) - return; - if (!icache_is_vivt_no_ictag()) - return; - - preempt_disable(); - - if (mm == current->mm) { - __load_new_mm_context(mm); - if (atomic_read(&mm->mm_users) == 1) { - int cpu, this_cpu = smp_processor_id(); - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (!cpu_online(cpu) || cpu == this_cpu) - continue; - if (mm->context.asid[cpu]) - mm->context.asid[cpu] = 0; - } - preempt_enable(); - return; - } - } else - flush_tlb_other(mm); - - smp_call_function(ipi_flush_icache_page, mm, 1); - - preempt_enable(); -} - int native_cpu_disable(void) { int cpu = smp_processor_id(); -- Gitee From c558e217d0b80a50f7ea81c678f534907e736b99 Mon Sep 17 00:00:00 2001 From: Zhou Xuemei Date: Tue, 9 Aug 2022 08:30:54 +0800 Subject: [PATCH 18/77] =?UTF-8?q?sw64:=20iommu:=20allow=C2=A0unlimited?= =?UTF-8?q?=C2=A0minimum=C2=A0value=C2=A0of=C2=A0iova=C2=A0in=C2=A0unmanag?= =?UTF-8?q?ed=C2=A0domain?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- IO virtual address assignment in unmanaged domain is completely handed over to the device owner, so there is no need to set the IOVA baseline. Signed-off-by: Zhou Xuemei Signed-off-by: Gu Zitao --- drivers/iommu/sw64/sunway_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/sw64/sunway_iommu.c b/drivers/iommu/sw64/sunway_iommu.c index b6c8f1272d28..580619c6a571 100644 --- a/drivers/iommu/sw64/sunway_iommu.c +++ b/drivers/iommu/sw64/sunway_iommu.c @@ -1382,7 +1382,7 @@ static struct iommu_domain *sunway_iommu_domain_alloc(unsigned type) sdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); - sdomain->domain.geometry.aperture_start = SW64_DMA_START; + sdomain->domain.geometry.aperture_start = 0ULL; sdomain->domain.geometry.aperture_end = (~0ULL); sdomain->domain.geometry.force_aperture = true; sdomain->type = IOMMU_DOMAIN_UNMANAGED; -- Gitee From 2e88067254f5cc046c2bbac843a41541f31f90c1 Mon Sep 17 00:00:00 2001 From: He Sheng Date: Mon, 27 Jun 2022 15:02:03 +0800 Subject: [PATCH 19/77] sw64: force context reload without hmcall swpctx Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- The hmcall swpctx is a bit heavyweight for context reload, and we have provided some other hmcalls to do this: - wrfen: enable fpu. - wrptbr: update CSR:PTBR only. - load_mm: force update CSR:PTBR and CSR:UPN to switch mm. For smp setup, hmcall swpctx is also heavy because boot CPU only has to prepare stack pointer for secondary CPUs. So we remove the tidle_pcb[], and take tidle_ksp[] to hold target idle task's ksp. Each secondary CPU loads its ksp and update CSR:PTBR at boot time. With this patch, most hmcall swpctx invocations are removed. Signed-off-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/hmcall.h | 5 +++++ arch/sw_64/include/asm/mmu_context.h | 31 ++++------------------------ arch/sw_64/kernel/head.S | 11 ++++------ arch/sw_64/kernel/process.c | 1 - arch/sw_64/kernel/smp.c | 20 +++++------------- arch/sw_64/kernel/traps.c | 2 +- arch/sw_64/mm/fault.c | 2 +- arch/sw_64/mm/init.c | 11 +--------- 8 files changed, 21 insertions(+), 62 deletions(-) diff --git a/arch/sw_64/include/asm/hmcall.h b/arch/sw_64/include/asm/hmcall.h index 0e609e9cade7..30afc542039c 100644 --- a/arch/sw_64/include/asm/hmcall.h +++ b/arch/sw_64/include/asm/hmcall.h @@ -62,6 +62,11 @@ extern void halt(void) __attribute__((noreturn)); #define __halt() __asm__ __volatile__ ("sys_call %0 #halt" : : "i" (HMC_halt)) +#define fpu_enable() \ +{ \ + __asm__ __volatile__("sys_call %0" : : "i" (HMC_wrfen));\ +} + #define imb() \ __asm__ __volatile__ ("sys_call %0 #imb" : : "i" (HMC_imb) : "memory") diff --git a/arch/sw_64/include/asm/mmu_context.h b/arch/sw_64/include/asm/mmu_context.h index 762ffbf72dbb..3e75f34895bf 100644 --- a/arch/sw_64/include/asm/mmu_context.h +++ b/arch/sw_64/include/asm/mmu_context.h @@ -14,37 +14,14 @@ /* * Force a context reload. This is needed when we change the page - * table pointer or when we update the ASN of the current process. + * table pointer or when we update the ASID of the current process. + * + * CSR:UPN holds ASID and CSR:PTBR holds page table pointer. */ - -static inline unsigned long -__reload_thread(struct pcb_struct *pcb) -{ - register unsigned long a0 __asm__("$16"); - register unsigned long v0 __asm__("$0"); - - a0 = virt_to_phys(pcb); - __asm__ __volatile__( - "sys_call %2 #__reload_thread" - : "=r"(v0), "=r"(a0) - : "i"(HMC_swpctx), "r"(a0) - : "$1", "$22", "$23", "$24", "$25"); - - return v0; -} - #define load_asn_ptbr load_mm /* - * The maximum ASN's the processor supports. - * - * If a processor implements address space numbers (ASNs), and the old - * PTE has the Address Space Match (ASM) bit clear (ASNs in use) and - * the Valid bit set, then entries can also effectively be made coherent - * by assigning a new, unused ASN to the currently running process and - * not reusing the previous ASN before calling the appropriate HMcode - * routine to invalidate the translation buffer (TB). - * + * The maximum ASN's the processor supports. ASN is called ASID too. */ #ifdef CONFIG_SUBARCH_C3B diff --git a/arch/sw_64/kernel/head.S b/arch/sw_64/kernel/head.S index e43499d18357..4b2db3891dcb 100644 --- a/arch/sw_64/kernel/head.S +++ b/arch/sw_64/kernel/head.S @@ -76,17 +76,14 @@ __smp_callin: sys_call HMC_whami # Get hard cid - sll $0, 2, $0 ldi $1, __rcid_to_cpu - addl $1, $0, $1 + s4addl $0, $1, $1 ldw $0, 0($1) # Get logical cpu number - sll $0, 3, $0 - ldi $1, tidle_pcb - addl $1, $0, $1 - ldl $16, 0($1) # Get PCBB of idle thread + ldi $2, tidle_ksp + s8addl $0, $2, $2 + ldl $30, 0($2) # Get ksp of idle thread - sys_call HMC_swpctx ldi $8, 0x3fff # Find "current". bic $30, $8, $8 diff --git a/arch/sw_64/kernel/process.c b/arch/sw_64/kernel/process.c index 4d223a7255bb..f5525a194072 100644 --- a/arch/sw_64/kernel/process.c +++ b/arch/sw_64/kernel/process.c @@ -156,7 +156,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp, struct pt_regs *childregs = task_pt_regs(p); struct pt_regs *regs = current_pt_regs(); - childti->pcb.ksp = (unsigned long) childregs; childti->pcb.flags = 7; /* set FEN, clear everything else */ p->thread.sp = (unsigned long) childregs; diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index b95873a2696d..43d803c49545 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -34,7 +34,7 @@ EXPORT_SYMBOL(__cpu_to_rcid); int __rcid_to_cpu[NR_CPUS]; /* Map physical to logical */ EXPORT_SYMBOL(__rcid_to_cpu); -unsigned long tidle_pcb[NR_CPUS]; +void *tidle_ksp[NR_CPUS]; /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; @@ -110,6 +110,8 @@ void smp_callin(void) /* All kernel threads share the same mm context. */ mmgrab(&init_mm); current->active_mm = &init_mm; + /* update csr:ptbr */ + wrptbr(PFN_PHYS(current_thread_info()->pcb.ptbr)); /* inform the notifiers about the new cpu */ notify_cpu_starting(cpuid); @@ -153,23 +155,11 @@ static inline void set_secondary_ready(int cpuid) */ static int secondary_cpu_start(int cpuid, struct task_struct *idle) { - struct pcb_struct *ipcb; unsigned long timeout; - - ipcb = &task_thread_info(idle)->pcb; - /* - * Initialize the idle's PCB to something just good enough for - * us to get started. Immediately after starting, we'll swpctx - * to the target idle task's pcb. Reuse the stack in the mean - * time. Precalculate the target PCBB. + * Precalculate the target ksp. */ - ipcb->ksp = (unsigned long)ipcb + sizeof(union thread_union) - 16; - ipcb->usp = 0; - ipcb->pcc = 0; - ipcb->asn = 0; - tidle_pcb[cpuid] = ipcb->unique = virt_to_phys(ipcb); - ipcb->dv_match = ipcb->dv_mask = 0; + tidle_ksp[cpuid] = idle->stack + sizeof(union thread_union) - 16; DBGS("Starting secondary cpu %d: state 0x%lx\n", cpuid, idle->state); diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index 9915160d95d2..ff32330d73ab 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -284,7 +284,7 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) * with it. */ current_thread_info()->pcb.flags |= 1; - __reload_thread(¤t_thread_info()->pcb); + fpu_enable(); return; case 5: /* illoc */ diff --git a/arch/sw_64/mm/fault.c b/arch/sw_64/mm/fault.c index 541e30b4b84c..b7fc2c816698 100644 --- a/arch/sw_64/mm/fault.c +++ b/arch/sw_64/mm/fault.c @@ -76,7 +76,7 @@ void __load_new_mm_context(struct mm_struct *next_mm) pcb->asn = mmc & HARDWARE_ASN_MASK; pcb->ptbr = virt_to_pfn(next_mm->pgd); - __reload_thread(pcb); + load_asn_ptbr(pcb->asn, pcb->ptbr); } /* diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index 6ed1ef8e020c..ef8e45ece0ff 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -77,21 +77,12 @@ pgd_alloc(struct mm_struct *mm) return ret; } -static inline unsigned long -load_PCB(struct pcb_struct *pcb) -{ - register unsigned long sp __asm__("$30"); - pcb->ksp = sp; - return __reload_thread(pcb); -} - /* Set up initial PCB, VPTB, and other such nicities. */ static inline void switch_to_system_map(void) { unsigned long newptbr; - unsigned long original_pcb_ptr; /* * Initialize the kernel's page tables. Linux puts the vptb in @@ -103,7 +94,7 @@ switch_to_system_map(void) /* Also set up the real kernel PCB while we're at it. */ init_thread_info.pcb.ptbr = newptbr; init_thread_info.pcb.flags = 1; /* set FEN, clear everything else */ - original_pcb_ptr = load_PCB(&init_thread_info.pcb); + wrptbr(PFN_PHYS(newptbr)); tbiv(); } -- Gitee From 14e852c089e0a6beaba933b55bd1a158da57a059 Mon Sep 17 00:00:00 2001 From: He Sheng Date: Tue, 28 Jun 2022 14:03:13 +0800 Subject: [PATCH 20/77] sw64: remove hmcall swpctx from context switch Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- It used to switch context in hmcall swpctx, which is not flexible enough. We try to make it happen in kernel without hmcall swpctx. To achieve this end, not only fpu state but also usp and tls pointer have to be saved and restored. For process creation and hibernation, the current tls pointer has to be read from CSR:TID as it may be out- of-sync with the saved value. For suspend, it's better to be saved and restored because there is no guarantee that WAKEUP interrupt will be used. To do this, we add hmcall fixup to access CSR:TID and obtain backward compatibility for user. Besides, the old `unique` is too obscure to be understood. To make it clear, we rename it to `tp` which is short for tls pointer, and then retain HMC_rdunique/wrunique as alias of HMC_rdtp/wrtp. Signed-off-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/hmcall.h | 14 ++++---- arch/sw_64/include/asm/mmu_context.h | 20 ++++------- arch/sw_64/include/asm/switch_to.h | 31 ++++++++++------ arch/sw_64/include/asm/thread_info.h | 10 +++++- arch/sw_64/include/asm/vcpu.h | 2 +- arch/sw_64/include/uapi/asm/hmcall.h | 6 ++-- arch/sw_64/include/uapi/asm/ptrace.h | 3 +- arch/sw_64/kernel/Makefile | 2 +- arch/sw_64/kernel/early_init.c | 1 + arch/sw_64/kernel/entry.S | 52 ++++++++++++--------------- arch/sw_64/kernel/hibernate.c | 4 +-- arch/sw_64/kernel/hmcall.c | 54 ++++++++++++++++++++++++++++ arch/sw_64/kernel/kgdb.c | 2 +- arch/sw_64/kernel/process.c | 16 ++++++--- arch/sw_64/kernel/ptrace.c | 2 +- arch/sw_64/kernel/smp.c | 2 +- arch/sw_64/kernel/suspend.c | 2 ++ arch/sw_64/mm/fault.c | 10 +++--- arch/sw_64/mm/init.c | 13 +------ 19 files changed, 151 insertions(+), 95 deletions(-) create mode 100644 arch/sw_64/kernel/hmcall.c diff --git a/arch/sw_64/include/asm/hmcall.h b/arch/sw_64/include/asm/hmcall.h index 30afc542039c..084a39ba649e 100644 --- a/arch/sw_64/include/asm/hmcall.h +++ b/arch/sw_64/include/asm/hmcall.h @@ -45,20 +45,19 @@ /* 0x80 - 0xBF : User Level HMC routine */ -#define HMC_bpt 0x80 -#define HMC_callsys 0x83 -#define HMC_imb 0x86 +#include + +/* Following will be deprecated from user level invocation */ #define HMC_rwreg 0x87 -#define HMC_rdunique 0x9E -#define HMC_wrunique 0x9F #define HMC_sz_uflush 0xA8 -#define HMC_gentrap 0xAA -#define HMC_wrperfmon 0xB0 #define HMC_longtime 0xB1 #ifdef __KERNEL__ #ifndef __ASSEMBLY__ +#include +extern void __init fixup_hmcall(void); + extern void halt(void) __attribute__((noreturn)); #define __halt() __asm__ __volatile__ ("sys_call %0 #halt" : : "i" (HMC_halt)) @@ -183,6 +182,7 @@ __CALL_HMC_W1(wrtimer, unsigned long); __CALL_HMC_RW3(tbivpn, unsigned long, unsigned long, unsigned long, unsigned long); __CALL_HMC_RW2(cpuid, unsigned long, unsigned long, unsigned long); +__CALL_HMC_W1(wrtp, unsigned long); /* * TB routines.. */ diff --git a/arch/sw_64/include/asm/mmu_context.h b/arch/sw_64/include/asm/mmu_context.h index 3e75f34895bf..10199db1d637 100644 --- a/arch/sw_64/include/asm/mmu_context.h +++ b/arch/sw_64/include/asm/mmu_context.h @@ -73,8 +73,7 @@ switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, struct task_struct *next) { /* Check if our ASN is of an older version, and thus invalid. */ - unsigned long asn; - unsigned long mmc; + unsigned long asn, mmc, ptbr; long cpu = smp_processor_id(); #ifdef CONFIG_SMP @@ -94,17 +93,13 @@ switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, #endif /* - * Always update the PCB ASN. Another thread may have allocated - * a new mm->context (via flush_tlb_mm) without the ASN serial + * Update CSR:UPN and CSR:PTBR. Another thread may have allocated + * a new mm->context[asid] (via flush_tlb_mm) without the ASN serial * number wrapping. We have no way to detect when this is needed. */ - task_thread_info(next)->pcb.asn = mmc & HARDWARE_ASN_MASK; - /* - * Always update the PCB PTBR. If next is kernel thread, it must - * update PTBR. If next is user process, it's ok to update PTBR. - */ - task_thread_info(next)->pcb.ptbr = virt_to_pfn(next_mm->pgd); - load_asn_ptbr(task_thread_info(next)->pcb.asn, task_thread_info(next)->pcb.ptbr); + asn = mmc & HARDWARE_ASN_MASK; + ptbr = virt_to_pfn(next_mm->pgd); + load_asn_ptbr(asn, ptbr); } extern void __load_new_mm_context(struct mm_struct *); @@ -141,8 +136,6 @@ static inline int init_new_context(struct task_struct *tsk, for_each_possible_cpu(i) mm->context.asid[i] = 0; - if (tsk != current) - task_thread_info(tsk)->pcb.ptbr = virt_to_pfn(mm->pgd); return 0; } @@ -154,7 +147,6 @@ static inline void destroy_context(struct mm_struct *mm) static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { - task_thread_info(tsk)->pcb.ptbr = virt_to_pfn(mm->pgd); } static inline int arch_dup_mmap(struct mm_struct *oldmm, diff --git a/arch/sw_64/include/asm/switch_to.h b/arch/sw_64/include/asm/switch_to.h index d503fc59390f..967fe1d680da 100644 --- a/arch/sw_64/include/asm/switch_to.h +++ b/arch/sw_64/include/asm/switch_to.h @@ -6,27 +6,39 @@ extern void __fpstate_save(struct task_struct *save_to); extern void __fpstate_restore(struct task_struct *restore_from); -extern struct task_struct *__switch_to(unsigned long pcb, - struct task_struct *prev, struct task_struct *next); +extern struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next); extern void restore_da_match_after_sched(void); -static inline void fpstate_save(struct task_struct *task) +static inline void aux_save(struct task_struct *task) { - if (likely(!(task->flags & PF_KTHREAD))) + struct pcb_struct *pcb; + + if (likely(!(task->flags & PF_KTHREAD))) { + pcb = &task_thread_info(task)->pcb; + pcb->usp = rdusp(); + pcb->tp = rtid(); __fpstate_save(task); + } } -static inline void fpstate_restore(struct task_struct *task) +static inline void aux_restore(struct task_struct *task) { - if (likely(!(task->flags & PF_KTHREAD))) + struct pcb_struct *pcb; + + if (likely(!(task->flags & PF_KTHREAD))) { + pcb = &task_thread_info(task)->pcb; + wrusp(pcb->usp); + wrtp(pcb->tp); __fpstate_restore(task); + } } static inline void __switch_to_aux(struct task_struct *prev, struct task_struct *next) { - fpstate_save(prev); - fpstate_restore(next); + aux_save(prev); + aux_restore(next); } @@ -34,9 +46,8 @@ static inline void __switch_to_aux(struct task_struct *prev, do { \ struct task_struct *__prev = (prev); \ struct task_struct *__next = (next); \ - __u64 __nextpcb = virt_to_phys(&task_thread_info(__next)->pcb); \ __switch_to_aux(__prev, __next); \ - (last) = __switch_to(__nextpcb, __prev, __next); \ + (last) = __switch_to(__prev, __next); \ check_mmu_context(); \ } while (0) diff --git a/arch/sw_64/include/asm/thread_info.h b/arch/sw_64/include/asm/thread_info.h index 33b95f815448..8e4e1f372d73 100644 --- a/arch/sw_64/include/asm/thread_info.h +++ b/arch/sw_64/include/asm/thread_info.h @@ -20,7 +20,7 @@ struct pcb_struct { unsigned long ptbr; unsigned int pcc; unsigned int asn; - unsigned long unique; + unsigned long tp; unsigned long flags; unsigned long da_match, da_mask; unsigned long dv_match, dv_mask; @@ -47,6 +47,14 @@ struct thread_info { #endif }; +static __always_inline u64 rtid(void) +{ + u64 val; + + asm volatile("rtid %0" : "=r" (val) : :); + return val; +} + /* * Macros/functions for gaining access to the thread information structure. */ diff --git a/arch/sw_64/include/asm/vcpu.h b/arch/sw_64/include/asm/vcpu.h index 5b3fe80aed1b..476c396c5aa4 100644 --- a/arch/sw_64/include/asm/vcpu.h +++ b/arch/sw_64/include/asm/vcpu.h @@ -32,7 +32,7 @@ struct vcpucb { unsigned long vcpu_irq_disabled; unsigned long vcpu_irq; unsigned long ptbr; - unsigned long int_stat0; + unsigned long tid; unsigned long int_stat1; unsigned long int_stat2; unsigned long int_stat3; diff --git a/arch/sw_64/include/uapi/asm/hmcall.h b/arch/sw_64/include/uapi/asm/hmcall.h index f10378ba99c8..dcff778e1616 100644 --- a/arch/sw_64/include/uapi/asm/hmcall.h +++ b/arch/sw_64/include/uapi/asm/hmcall.h @@ -7,8 +7,10 @@ #define HMC_bpt 0x80 #define HMC_callsys 0x83 #define HMC_imb 0x86 -#define HMC_rdunique 0x9E -#define HMC_wrunique 0x9F +#define HMC_rdtp 0x9E +#define HMC_wrtp 0x9F +#define HMC_rdunique HMC_rdtp +#define HMC_wrunique HMC_wrtp #define HMC_gentrap 0xAA #define HMC_wrperfmon 0xB0 diff --git a/arch/sw_64/include/uapi/asm/ptrace.h b/arch/sw_64/include/uapi/asm/ptrace.h index 80bad067fc15..5cf3ca1d3dd8 100644 --- a/arch/sw_64/include/uapi/asm/ptrace.h +++ b/arch/sw_64/include/uapi/asm/ptrace.h @@ -36,7 +36,8 @@ struct user_fpsimd_state { #define FPREG_END 62 #define FPCR 63 #define PC 64 -#define UNIQUE 65 +#define TP 65 +#define UNIQUE TP #define VECREG_BASE 67 #define VECREG_END 161 #define F31_V1 98 diff --git a/arch/sw_64/kernel/Makefile b/arch/sw_64/kernel/Makefile index 8cc09dd8fdbc..02facabae2d9 100644 --- a/arch/sw_64/kernel/Makefile +++ b/arch/sw_64/kernel/Makefile @@ -17,7 +17,7 @@ obj-y := entry.o fpu.o traps.o process.o sys_sw64.o irq.o \ irq_sw64.o signal.o setup.o ptrace.o time.o \ systbls.o dup_print.o tc.o timer.o \ insn.o early_init.o topology.o cacheinfo.o \ - vdso.o vdso/ + vdso.o vdso/ hmcall.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/sw_64/kernel/early_init.c b/arch/sw_64/kernel/early_init.c index 392627bef8bb..2f38719cc216 100644 --- a/arch/sw_64/kernel/early_init.c +++ b/arch/sw_64/kernel/early_init.c @@ -23,6 +23,7 @@ static void __init sw64_setup_platform_ops(void) asmlinkage __visible void __init sw64_start_kernel(void) { + fixup_hmcall(); sw64_setup_chip_ops(); sw64_setup_platform_ops(); sw64_platform->ops_fixup(); diff --git a/arch/sw_64/kernel/entry.S b/arch/sw_64/kernel/entry.S index f79c9a6ddf36..01896128ed23 100644 --- a/arch/sw_64/kernel/entry.S +++ b/arch/sw_64/kernel/entry.S @@ -384,11 +384,10 @@ $syscall_trace_failed: * Integer register context switch * The callee-saved registers must be saved and restored. * - * a0: physical address of next task's pcb, used by hmcode - * a1: previous task_struct (must be preserved across the switch) - * a2: next task_struct + * a0: previous task_struct (must be preserved across the switch) + * a1: next task_struct * - * The value of a1 must be preserved by this function, as that's how + * The value of a0 must be preserved by this function, as that's how * arguments are passed to schedule_tail. */ .align 4 @@ -397,33 +396,28 @@ $syscall_trace_failed: __switch_to: .prologue 0 /* Save context into prev->thread */ - stl $26, TASK_THREAD_RA($17) - stl $30, TASK_THREAD_SP($17) - stl $9, TASK_THREAD_S0($17) - stl $10, TASK_THREAD_S1($17) - stl $11, TASK_THREAD_S2($17) - stl $12, TASK_THREAD_S3($17) - stl $13, TASK_THREAD_S4($17) - stl $14, TASK_THREAD_S5($17) - stl $15, TASK_THREAD_S6($17) + stl $26, TASK_THREAD_RA($16) + stl $30, TASK_THREAD_SP($16) + stl $9, TASK_THREAD_S0($16) + stl $10, TASK_THREAD_S1($16) + stl $11, TASK_THREAD_S2($16) + stl $12, TASK_THREAD_S3($16) + stl $13, TASK_THREAD_S4($16) + stl $14, TASK_THREAD_S5($16) + stl $15, TASK_THREAD_S6($16) /* Restore context from next->thread */ - ldl $26, TASK_THREAD_RA($18) - ldl $9, TASK_THREAD_S0($18) - ldl $10, TASK_THREAD_S1($18) - ldl $11, TASK_THREAD_S2($18) - ldl $12, TASK_THREAD_S3($18) - ldl $13, TASK_THREAD_S4($18) - ldl $14, TASK_THREAD_S5($18) - ldl $15, TASK_THREAD_S6($18) - sys_call HMC_swpctx - /* - * SP has been saved and restored by HMC_swpctx, - * and restore it again here for future expansion. - */ - ldl $30, TASK_THREAD_SP($18) + ldl $26, TASK_THREAD_RA($17) + ldl $30, TASK_THREAD_SP($17) + ldl $9, TASK_THREAD_S0($17) + ldl $10, TASK_THREAD_S1($17) + ldl $11, TASK_THREAD_S2($17) + ldl $12, TASK_THREAD_S3($17) + ldl $13, TASK_THREAD_S4($17) + ldl $14, TASK_THREAD_S5($17) + ldl $15, TASK_THREAD_S6($17) ldi $8, 0x3fff bic $sp, $8, $8 - mov $17, $0 + mov $16, $0 ret .end __switch_to @@ -436,7 +430,6 @@ __switch_to: .ent ret_from_fork ret_from_fork: ldi $26, ret_from_sys_call - mov $17, $16 jmp $31, schedule_tail .end ret_from_fork @@ -447,7 +440,6 @@ ret_from_fork: .globl ret_from_kernel_thread .ent ret_from_kernel_thread ret_from_kernel_thread: - mov $17, $16 call $26, schedule_tail mov $9, $27 mov $10, $16 diff --git a/arch/sw_64/kernel/hibernate.c b/arch/sw_64/kernel/hibernate.c index 799706db5b94..0e7e860c507e 100644 --- a/arch/sw_64/kernel/hibernate.c +++ b/arch/sw_64/kernel/hibernate.c @@ -14,7 +14,7 @@ void save_processor_state(void) vcb->ksp = rdksp(); vcb->usp = rdusp(); - vcb->pcbb = rdpcbb(); + vcb->tid = rtid(); vcb->ptbr = rdptbr(); } @@ -24,7 +24,7 @@ void restore_processor_state(void) wrksp(vcb->ksp); wrusp(vcb->usp); - wrpcbb(vcb->pcbb); + wrtp(vcb->tid); wrptbr(vcb->ptbr); sflush(); tbiv(); diff --git a/arch/sw_64/kernel/hmcall.c b/arch/sw_64/kernel/hmcall.c new file mode 100644 index 000000000000..b81d7fff1c34 --- /dev/null +++ b/arch/sw_64/kernel/hmcall.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * arch/sw_64/kernel/hmcall.c + * + * Copyright (C) 2022 WXIAT + * Author: He Sheng + */ + +#include +#include + +#define A0(func) (((HMC_##func & 0xFF) >> 6) & 0x1) +#define A1(func) ((((HMC_##func & 0xFF)>>6) & 0x2) >> 1) +#define A2(func) ((HMC_##func & 0x3F) << 7) + +#define T(func) ((A0(func) ^ A1(func)) & 0x1) +#define B0(func) ((T(func) | A0(func)) << 13) +#define B1(func) (((~T(func) & 1) | A1(func)) << 14) + +#define PRI_BASE 0x10000UL + +#define HMCALL_ENTRY(func) (PRI_BASE | B1(func) | B0(func) | A2(func)) + + +static inline void fixup_rdtp(void) +{ + unsigned int *entry = __va(HMCALL_ENTRY(rdtp)); + + entry[0] = 0x181ffec7; /* pri_rcsr $0, CSR__TID */ + entry[1] = 0x1ee00000; /* pri_ret $23 */ +} + +static inline void fixup_wrtp(void) +{ + unsigned int *entry = __va(HMCALL_ENTRY(wrtp)); + + entry[0] = 0x1a1fffc7; /* pri_wcsr $16, CSR__TID */ + entry[1] = 0x1ee00000; /* pri_ret $23 */ +} + +void __init fixup_hmcall(void) +{ +#if defined(CONFIG_SUBARCH_C3A) || defined(CONFIG_SUBARCH_C3B) + fixup_rdtp(); + fixup_wrtp(); +#endif +} + +#undef A0 +#undef A1 +#undef A2 +#undef T +#undef B0 +#undef B1 diff --git a/arch/sw_64/kernel/kgdb.c b/arch/sw_64/kernel/kgdb.c index ac2f397f1609..95970b293de0 100644 --- a/arch/sw_64/kernel/kgdb.c +++ b/arch/sw_64/kernel/kgdb.c @@ -95,7 +95,7 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { { "pc", 8, offsetof(struct pt_regs, pc)}, { "", 8, -1 }, - { "unique", 8, -1}, + { "tp", 8, -1}, }; char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) diff --git a/arch/sw_64/kernel/process.c b/arch/sw_64/kernel/process.c index f5525a194072..31e95e722e81 100644 --- a/arch/sw_64/kernel/process.c +++ b/arch/sw_64/kernel/process.c @@ -125,7 +125,7 @@ flush_thread(void) wrfpcr(FPCR_DYN_NORMAL | ieee_swcr_to_fpcr(0)); /* Clean slate for TLS. */ - current_thread_info()->pcb.unique = 0; + current_thread_info()->pcb.tp = 0; } void @@ -135,7 +135,11 @@ release_thread(struct task_struct *dead_task) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - fpstate_save(src); + /* + * aux_save() has to read the current TLS pointer from CSR:TID as it + * may be out-of-sync with the saved value. + */ + aux_save(src); *dst = *src; return 0; } @@ -168,6 +172,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, childti->pcb.usp = 0; return 0; } + /* * Note: if CLONE_SETTLS is not set, then we must inherit the * value from the parent, which will have been set by the block @@ -176,10 +181,11 @@ copy_thread(unsigned long clone_flags, unsigned long usp, * application calling fork. */ if (clone_flags & CLONE_SETTLS) - childti->pcb.unique = tls; + childti->pcb.tp = regs->r20; else regs->r20 = 0; - childti->pcb.usp = usp ?: rdusp(); + if (usp) + childti->pcb.usp = usp; *childregs = *regs; childregs->r0 = 0; childregs->r19 = 0; @@ -202,7 +208,7 @@ void sw64_elf_core_copy_regs(elf_greg_t *dest, struct pt_regs *regs) dest[i] = *(__u64 *)((void *)regs + regoffsets[i]); dest[30] = ti == current_thread_info() ? rdusp() : ti->pcb.usp; dest[31] = regs->pc; - dest[32] = ti->pcb.unique; + dest[32] = ti->pcb.tp; } EXPORT_SYMBOL(sw64_elf_core_copy_regs); diff --git a/arch/sw_64/kernel/ptrace.c b/arch/sw_64/kernel/ptrace.c index 064296711b2f..51826cdbe9ef 100644 --- a/arch/sw_64/kernel/ptrace.c +++ b/arch/sw_64/kernel/ptrace.c @@ -72,7 +72,7 @@ short regoffsets[32] = { static int pcboff[] = { [USP] = PCB_OFF(usp), - [UNIQUE] = PCB_OFF(unique), + [TP] = PCB_OFF(tp), [DA_MATCH] = PCB_OFF(da_match), [DA_MASK] = PCB_OFF(da_mask), [DV_MATCH] = PCB_OFF(dv_match), diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index 43d803c49545..8f752c604db0 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -111,7 +111,7 @@ void smp_callin(void) mmgrab(&init_mm); current->active_mm = &init_mm; /* update csr:ptbr */ - wrptbr(PFN_PHYS(current_thread_info()->pcb.ptbr)); + wrptbr(virt_to_phys(init_mm.pgd)); /* inform the notifiers about the new cpu */ notify_cpu_starting(cpuid); diff --git a/arch/sw_64/kernel/suspend.c b/arch/sw_64/kernel/suspend.c index 369bc1e19b85..994d8e245878 100644 --- a/arch/sw_64/kernel/suspend.c +++ b/arch/sw_64/kernel/suspend.c @@ -33,6 +33,7 @@ void sw64_suspend_enter(void) */ disable_local_timer(); + current_thread_info()->pcb.tp = rtid(); #ifdef CONFIG_SW64_SUSPEND_DEEPSLEEP_BOOTCORE sw64_suspend_deep_sleep(&suspend_state); @@ -40,6 +41,7 @@ void sw64_suspend_enter(void) mtinten(); asm("halt"); #endif + wrtp(current_thread_info()->pcb.tp); disable_local_timer(); } diff --git a/arch/sw_64/mm/fault.c b/arch/sw_64/mm/fault.c index b7fc2c816698..126752771b11 100644 --- a/arch/sw_64/mm/fault.c +++ b/arch/sw_64/mm/fault.c @@ -66,17 +66,15 @@ void show_all_vma(void) */ void __load_new_mm_context(struct mm_struct *next_mm) { - unsigned long mmc; - struct pcb_struct *pcb; + unsigned long mmc, asn, ptbr; mmc = __get_new_mm_context(next_mm, smp_processor_id()); next_mm->context.asid[smp_processor_id()] = mmc; - pcb = ¤t_thread_info()->pcb; - pcb->asn = mmc & HARDWARE_ASN_MASK; - pcb->ptbr = virt_to_pfn(next_mm->pgd); + asn = mmc & HARDWARE_ASN_MASK; + ptbr = virt_to_pfn(next_mm->pgd); - load_asn_ptbr(pcb->asn, pcb->ptbr); + load_asn_ptbr(asn, ptbr); } /* diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index ef8e45ece0ff..93ec3ecdf4f1 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -82,19 +82,8 @@ pgd_alloc(struct mm_struct *mm) static inline void switch_to_system_map(void) { - unsigned long newptbr; - - /* - * Initialize the kernel's page tables. Linux puts the vptb in - * the last slot of the L1 page table. - */ memset(swapper_pg_dir, 0, PAGE_SIZE); - newptbr = virt_to_pfn(swapper_pg_dir); - - /* Also set up the real kernel PCB while we're at it. */ - init_thread_info.pcb.ptbr = newptbr; - init_thread_info.pcb.flags = 1; /* set FEN, clear everything else */ - wrptbr(PFN_PHYS(newptbr)); + wrptbr(virt_to_phys(swapper_pg_dir)); tbiv(); } -- Gitee From 50819653156c69d322a104a254767cf7623aa15b Mon Sep 17 00:00:00 2001 From: He Sheng Date: Wed, 27 Jul 2022 09:16:21 +0800 Subject: [PATCH 21/77] sw64: remove unused members from pcb_struct Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Since we have removed hmcall swpctx invocations, some members of struct pcb_struct become useless. This patch removes them to reduce the struct size. As a result, struct processor_state is simplified too. Signed-off-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/suspend.h | 2 +- arch/sw_64/include/asm/thread_info.h | 5 ----- arch/sw_64/kernel/asm-offsets.c | 3 +-- arch/sw_64/kernel/hibernate_asm.S | 6 ++---- arch/sw_64/kernel/process.c | 1 - arch/sw_64/kernel/traps.c | 1 - 6 files changed, 4 insertions(+), 14 deletions(-) diff --git a/arch/sw_64/include/asm/suspend.h b/arch/sw_64/include/asm/suspend.h index 83fd413fd6e2..de6d97a0aff6 100644 --- a/arch/sw_64/include/asm/suspend.h +++ b/arch/sw_64/include/asm/suspend.h @@ -39,7 +39,7 @@ struct processor_state { struct callee_saved_fpregs fpregs; unsigned long fpcr; #ifdef CONFIG_HIBERNATION - struct pcb_struct pcb; + unsigned long sp; struct vcpucb vcb; #endif }; diff --git a/arch/sw_64/include/asm/thread_info.h b/arch/sw_64/include/asm/thread_info.h index 8e4e1f372d73..7cdafaec62e4 100644 --- a/arch/sw_64/include/asm/thread_info.h +++ b/arch/sw_64/include/asm/thread_info.h @@ -15,13 +15,8 @@ typedef struct { struct pcb_struct { - unsigned long ksp; unsigned long usp; - unsigned long ptbr; - unsigned int pcc; - unsigned int asn; unsigned long tp; - unsigned long flags; unsigned long da_match, da_mask; unsigned long dv_match, dv_mask; unsigned long dc_ctl; diff --git a/arch/sw_64/kernel/asm-offsets.c b/arch/sw_64/kernel/asm-offsets.c index 9e6c338a5edd..56c5daaa413c 100644 --- a/arch/sw_64/kernel/asm-offsets.c +++ b/arch/sw_64/kernel/asm-offsets.c @@ -33,9 +33,8 @@ void foo(void) OFFSET(PSTATE_FPREGS, processor_state, fpregs); OFFSET(PSTATE_FPCR, processor_state, fpcr); #ifdef CONFIG_HIBERNATION - OFFSET(PSTATE_PCB, processor_state, pcb); + OFFSET(PSTATE_SP, processor_state, sp); #endif - OFFSET(PCB_KSP, pcb_struct, ksp); OFFSET(PBE_ADDR, pbe, address); OFFSET(PBE_ORIG_ADDR, pbe, orig_address); OFFSET(PBE_NEXT, pbe, next); diff --git a/arch/sw_64/kernel/hibernate_asm.S b/arch/sw_64/kernel/hibernate_asm.S index 3acbcdbae0b3..23bab0d6edd8 100644 --- a/arch/sw_64/kernel/hibernate_asm.S +++ b/arch/sw_64/kernel/hibernate_asm.S @@ -30,8 +30,7 @@ ENTRY(swsusp_arch_suspend) rfpcr $f0 fstd $f0, PSTATE_FPCR($16) - ldi $1, PSTATE_PCB($16) - stl sp, PCB_KSP($1) + stl sp, PSTATE_SP($16) call swsusp_save ldi $16, hibernate_state ldi $1, PSTATE_REGS($16) @@ -112,8 +111,7 @@ $hibernate_setfpec_over: vldd $f8, CALLEE_F8($1) vldd $f9, CALLEE_F9($1) - ldi $1, PSTATE_PCB($16) - ldl sp, PCB_KSP($1) + ldl sp, PSTATE_SP($16) ldi $8, 0x3fff bic sp, $8, $8 diff --git a/arch/sw_64/kernel/process.c b/arch/sw_64/kernel/process.c index 31e95e722e81..e1689d25f77d 100644 --- a/arch/sw_64/kernel/process.c +++ b/arch/sw_64/kernel/process.c @@ -160,7 +160,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp, struct pt_regs *childregs = task_pt_regs(p); struct pt_regs *regs = current_pt_regs(); - childti->pcb.flags = 7; /* set FEN, clear everything else */ p->thread.sp = (unsigned long) childregs; if (unlikely(p->flags & PF_KTHREAD)) { diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index ff32330d73ab..a54db1d7045b 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -283,7 +283,6 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) * attacks. So turn the bleeding FPU back on and be done * with it. */ - current_thread_info()->pcb.flags |= 1; fpu_enable(); return; -- Gitee From 87368f61a121d799bb9236372ba57840f8f5078e Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Fri, 5 Aug 2022 08:52:26 +0800 Subject: [PATCH 22/77] sw64: fix sys_rt_sigaction Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I56XYC -------------------------------- __ARCH_HAS_SA_RESTORER was defined to fix compile error in the past. However, this changed the offset of sa_mask in struct sigaction and made sys_rt_sigaction unable to get sa_mask passed from user. To fix this problem, the old sigaction and the related structs are added back, macro define __ARCH_HAS_SA_RESTORER is removed. Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 1 - arch/sw_64/include/asm/signal.h | 8 ++++--- arch/sw_64/kernel/signal.c | 30 ++++++++++++++++++++++++++ arch/sw_64/kernel/syscalls/syscall.tbl | 2 +- 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index deefaf312628..392f7806afcb 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -23,7 +23,6 @@ config SW64 select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_SECCOMP_FILTER - select OLD_SIGACTION select OLD_SIGSUSPEND select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER diff --git a/arch/sw_64/include/asm/signal.h b/arch/sw_64/include/asm/signal.h index 3e91b72c0b0a..0d846c1aa571 100644 --- a/arch/sw_64/include/asm/signal.h +++ b/arch/sw_64/include/asm/signal.h @@ -14,9 +14,11 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; -#ifdef CONFIG_OLD_SIGACTION -#define __ARCH_HAS_SA_RESTORER -#endif +struct odd_sigaction { + __sighandler_t sa_handler; + old_sigset_t sa_mask; + int sa_flags; +}; #include #endif diff --git a/arch/sw_64/kernel/signal.c b/arch/sw_64/kernel/signal.c index 6a6203ccb04f..a1edc5300742 100644 --- a/arch/sw_64/kernel/signal.c +++ b/arch/sw_64/kernel/signal.c @@ -38,6 +38,36 @@ SYSCALL_DEFINE2(odd_sigprocmask, int, how, unsigned long, newmask) return res; } +SYSCALL_DEFINE3(odd_sigaction, int, sig, + const struct odd_sigaction __user *, act, + struct odd_sigaction __user *, oact) +{ + struct k_sigaction new_ka, old_ka; + old_sigset_t mask; + int ret; + + if (act) { + if (!access_ok(act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) + return -EFAULT; + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (!access_ok(oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) + return -EFAULT; + } + + return ret; +} + /* * Do a signal return; undo the signal stack. */ diff --git a/arch/sw_64/kernel/syscalls/syscall.tbl b/arch/sw_64/kernel/syscalls/syscall.tbl index 42a179422b6b..35d108b49a61 100644 --- a/arch/sw_64/kernel/syscalls/syscall.tbl +++ b/arch/sw_64/kernel/syscalls/syscall.tbl @@ -163,7 +163,7 @@ #153 is unused #154 is unused #155 is unused -156 common sigaction sys_sigaction +156 common sigaction sys_odd_sigaction #157 is unused #158 is unused #159 is unused -- Gitee From d6ff11854ef463b4230f8e44072a3efbd674ca7c Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Mon, 8 Aug 2022 10:04:28 +0800 Subject: [PATCH 23/77] sw64: fix head loop in deep-copy_template.S Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PN9S -------------------------------- Fix conditional branch of head loop in deep-copy_template.S, so it will go to simd loop properly when dst is 32 bytes aligned. Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/lib/deep-copy_template.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/sw_64/lib/deep-copy_template.S b/arch/sw_64/lib/deep-copy_template.S index 8355ecf8a905..f80be2bef44c 100644 --- a/arch/sw_64/lib/deep-copy_template.S +++ b/arch/sw_64/lib/deep-copy_template.S @@ -83,9 +83,9 @@ $quad_loop_head: addl $17, 8, $17 FIXUP_LDST( stl $2, 0($16) ) addl $16, 8, $16 - and $16, 31, $1 blt $18, $simd_end - beq $16, $dest_aligned_32 + and $16, 31, $1 + beq $1, $dest_aligned_32 br $31, $quad_loop_head $dest_aligned_32: @@ -191,7 +191,8 @@ $quad_u_loop_head: FIXUP_LDST( stl $2, 0($16) ) addl $16, 8, $16 blt $18, $simd_end - beq $16, $dest_aligned_32 + and $16, 31, $1 + beq $1, $dest_aligned_32 br $31, $quad_u_loop_head $prep_simd_u_loop: -- Gitee From 7276d131f1341be0c4b1fe43dbbdaf972e2b5aac Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Mon, 8 Aug 2022 10:04:29 +0800 Subject: [PATCH 24/77] sw64: adjust instructions order of deep-copy_template.S Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PN9S -------------------------------- Adjust order of instructions in deep-copy_template.S to make sure $18 always has bytes left to copy. This makes sure the return value of copy_{to,from}_user() is correct. Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/lib/deep-copy_template.S | 158 ++++++++++++++-------------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/arch/sw_64/lib/deep-copy_template.S b/arch/sw_64/lib/deep-copy_template.S index f80be2bef44c..aa6ab8e29f05 100644 --- a/arch/sw_64/lib/deep-copy_template.S +++ b/arch/sw_64/lib/deep-copy_template.S @@ -3,9 +3,11 @@ /* * template for memcpy and copy_user with SIMD * - * $16: current store address - * $17: current load address - * $18: current bytes left to copy + * $4: 8-byte misalignment of src when dest is 8-byte aligned + * $5: 32-byte misalignment of src when dest is 32-byte aligned + * $16: latest dest, clobbered + * $17: latest src, clobbered + * $18: bytes left to copy * */ @@ -26,17 +28,14 @@ ldi $sp, 0x60($sp) #define SAVE_SIMD_U_REGS \ - ldi $sp, -0x120($sp); \ + ldi $sp, -0xc0($sp); \ addl $sp, 0x1f, $23; \ bic $23, 0x1f, $23; \ vstd $f1, 0($23); \ vstd $f2, 0x20($23); \ vstd $f4, 0x40($23); \ vstd $f5, 0x60($23); \ - vstd $f10, 0x80($23); \ - vstd $f11, 0xa0($23); \ - vstd $f20, 0xc0($23); \ - vstd $f21, 0xe0($23) + vstd $f3, 0x80($23) #define RESTORE_SIMD_U_REGS \ addl $sp, 0x1f, $23; \ @@ -45,22 +44,19 @@ vldd $f2, 0x20($23); \ vldd $f4, 0x40($23); \ vldd $f5, 0x60($23); \ - vldd $f10, 0x80($23); \ - vldd $f11, 0xa0($23); \ - vldd $f20, 0xc0($23); \ - vldd $f21, 0xe0($23); \ - ldi $sp, 0x120($sp) + vldd $f3, 0x80($23); \ + ldi $sp, 0xc0($sp) ble $18, $out and $16, 7, $1 beq $1, $dest_aligned_8 - .align 4 + .align 3 $byte_loop_head: FIXUP_LDST( ldbu $2, 0($17) ) + FIXUP_LDST( stb $2, 0($16) ) subl $18, 1, $18 addl $17, 1, $17 - FIXUP_LDST( stb $2, 0($16) ) addl $16, 1, $16 ble $18, $out and $16, 7, $1 @@ -68,27 +64,28 @@ $byte_loop_head: $dest_aligned_8: and $17, 7, $4 - subl $18, 16, $18 - blt $18, $quad_end - subl $18, 64, $18 - blt $18, $simd_end + cmplt $18, 16, $1 + bne $1, $quad_loop_end and $16, 31, $1 beq $1, $dest_aligned_32 + cmplt $18, 64, $1 + bne $1, $simd_end bne $4, $quad_u_loop_head - .align 5 + .align 3 $quad_loop_head: FIXUP_LDST( ldl $2, 0($17) ) - subl $18, 8, $18 - addl $17, 8, $17 FIXUP_LDST( stl $2, 0($16) ) addl $16, 8, $16 - blt $18, $simd_end + addl $17, 8, $17 + subl $18, 8, $18 and $16, 31, $1 beq $1, $dest_aligned_32 br $31, $quad_loop_head $dest_aligned_32: + cmplt $18, 64, $1 + bne $1, $simd_end and $17, 31, $5 bne $5, $prep_simd_u_loop @@ -98,63 +95,65 @@ $prep_simd_loop: cmple $18, $1, $1 bne $1, $simd_loop - .align 5 + .align 4 $simd_loop_nc: fillcs 128 * 5($17) FIXUP_LDST( vldd $f1, 0($17) ) FIXUP_LDST( vldd $f2, 32($17) ) - subl $18, 64, $18 - addl $17, 64, $17 FIXUP_LDST( vstd_nc $f1, 0($16) ) FIXUP_LDST( vstd_nc $f2, 32($16) ) + subl $18, 64, $18 + addl $17, 64, $17 addl $16, 64, $16 - bge $18, $simd_loop_nc + cmplt $18, 64, $1 + beq $1, $simd_loop_nc memb # required for _nc store instructions br $31, $simd_loop_end - .align 5 + .align 4 $simd_loop: fillcs 128 * 5($17) FIXUP_LDST( vldd $f1, 0($17) ) FIXUP_LDST( vldd $f2, 32($17) ) - subl $18, 64, $18 - addl $17, 64, $17 FIXUP_LDST( vstd $f1, 0($16) ) FIXUP_LDST( vstd $f2, 32($16) ) + subl $18, 64, $18 + addl $17, 64, $17 addl $16, 64, $16 - bge $18, $simd_loop + cmplt $18, 64, $1 + beq $1, $simd_loop $simd_loop_end: - addl $18, 64, $1 - cmplt $1, 32, $1 + cmplt $18, 32, $1 bne $1, $no_more_simd FIXUP_LDST( vldd $f1, 0($17) ) + FIXUP_LDST( vstd $f1, 0($16) ) subl $18, 32, $18 addl $17, 32, $17 - FIXUP_LDST( vstd $f1, 0($16) ) addl $16, 32, $16 $no_more_simd: RESTORE_SIMD_REGS $simd_end: - addl $18, 64, $18 - blt $18, $quad_end + ble $18, $out + cmplt $18, 16, $1 + bne $1, $quad_loop_end bne $4, $prep_quad_u_loop_tail .align 4 $quad_loop_tail: FIXUP_LDST( ldl $2, 0($17) ) FIXUP_LDST( ldl $3, 8($17) ) - subl $18, 16, $18 - addl $17, 16, $17 FIXUP_LDST( stl $2, 0($16) ) FIXUP_LDST( stl $3, 8($16) ) + subl $18, 16, $18 + addl $17, 16, $17 addl $16, 16, $16 - bge $18, $quad_loop_tail + cmplt $18, 16, $1 + beq $1, $quad_loop_tail -$quad_end: - addl $18, 16, $18 +$quad_loop_end: ble $18, $out cmplt $18, 8, $1 bne $1, $byte_loop_tail @@ -162,35 +161,34 @@ $quad_end: $move_one_quad: FIXUP_LDST( ldl $2, 0($17) ) + FIXUP_LDST( stl $2, 0($16) ) subl $18, 8, $18 addl $17, 8, $17 - FIXUP_LDST( stl $2, 0($16) ) addl $16, 8, $16 ble $18, $out - .align 4 + .align 3 $byte_loop_tail: FIXUP_LDST( ldbu $2, 0($17) ) + FIXUP_LDST( stb $2, 0($16) ) subl $18, 1, $18 addl $17, 1, $17 - FIXUP_LDST( stb $2, 0($16) ) addl $16, 1, $16 bgt $18, $byte_loop_tail br $31, $out /* misaligned src and dst */ - .align 5 + .align 4 $quad_u_loop_head: FIXUP_LDST( ldl_u $2, 0($17) ) FIXUP_LDST( ldl_u $3, 7($17) ) - subl $18, 8, $18 - addl $17, 8, $17 extll $2, $4, $2 exthl $3, $4, $3 bis $2, $3, $2 FIXUP_LDST( stl $2, 0($16) ) addl $16, 8, $16 - blt $18, $simd_end + addl $17, 8, $17 + subl $18, 8, $18 and $16, 31, $1 beq $1, $dest_aligned_32 br $31, $quad_u_loop_head @@ -210,53 +208,54 @@ $prep_simd_u_loop: cmple $18, $1, $1 bne $1, $simd_u_loop - .align 5 + .align 4 $simd_u_loop_nc: FIXUP_LDST( vldd $f5, 32($3) ) fillcs 128 * 5($3) - srlow $f4, $f1, $f10 - sllow $f5, $f2, $f11 - vlogfc $f10, $f11, $f31, $f10 + srlow $f4, $f1, $f4 + sllow $f5, $f2, $f3 + vlogfc $f3, $f4, $f31, $f3 + FIXUP_LDST( vstd_nc $f3, 0($16) ) FIXUP_LDST( vldd $f4, 64($3) ) - srlow $f5, $f1, $f20 - sllow $f4, $f2, $f21 - vlogfc $f20, $f21, $f31, $f20 - FIXUP_LDST( vstd_nc $f10, 0($16) ) - FIXUP_LDST( vstd_nc $f20, 32($16) ) + srlow $f5, $f1, $f5 + sllow $f4, $f2, $f3 + vlogfc $f5, $f3, $f31, $f5 + FIXUP_LDST( vstd_nc $f5, 32($16) ) subl $18, 64, $18 addl $3, 64, $3 addl $16, 64, $16 - bge $18, $simd_u_loop_nc + cmplt $18, 64, $1 + beq $1, $simd_u_loop_nc memb # required for _nc store instructions br $31, $simd_u_loop_end - .align 5 + .align 4 $simd_u_loop: FIXUP_LDST( vldd $f5, 32($3) ) fillcs 128 * 5($3) - srlow $f4, $f1, $f10 - sllow $f5, $f2, $f11 - vlogfc $f10, $f11, $f31, $f10 + srlow $f4, $f1, $f4 + sllow $f5, $f2, $f3 + vlogfc $f4, $f3, $f31, $f3 + FIXUP_LDST( vstd $f3, 0($16) ) FIXUP_LDST( vldd $f4, 64($3) ) - srlow $f5, $f1, $f20 - sllow $f4, $f2, $f21 - vlogfc $f20, $f21, $f31, $f20 - FIXUP_LDST( vstd $f10, 0($16) ) - FIXUP_LDST( vstd $f20, 32($16) ) + srlow $f5, $f1, $f5 + sllow $f4, $f2, $f3 + vlogfc $f5, $f3, $f31, $f3 + FIXUP_LDST( vstd $f3, 32($16) ) subl $18, 64, $18 addl $3, 64, $3 addl $16, 64, $16 - bge $18, $simd_u_loop + cmplt $18, 64, $1 + beq $1, $simd_u_loop $simd_u_loop_end: - addl $18, 64, $1 - cmplt $1, 32, $1 + cmplt $18, 32, $1 bne $1, $no_more_simd_u FIXUP_LDST( vldd $f5, 32($3) ) - srlow $f4, $f1, $f10 - sllow $f5, $f2, $f11 - vlogfc $f10, $f11, $f31, $f10 - FIXUP_LDST( vstd $f10, 0($16) ) + srlow $f4, $f1, $f4 + sllow $f5, $f2, $f3 + vlogfc $f4, $f3, $f31, $f3 + FIXUP_LDST( vstd $f3, 0($16) ) subl $18, 32, $18 addl $3, 32, $3 addl $16, 32, $16 @@ -268,7 +267,7 @@ $no_more_simd_u: $prep_quad_u_loop_tail: FIXUP_LDST( ldl_u $2, 0($17) ) - .align 5 + .align 4 $quad_u_loop_tail: FIXUP_LDST( ldl_u $3, 8($17) ) extll $2, $4, $22 @@ -283,18 +282,19 @@ $quad_u_loop_tail: subl $18, 16, $18 addl $17, 16, $17 addl $16, 16, $16 - bge $18, $quad_u_loop_tail - br $31, $quad_end + cmplt $18, 16, $1 + beq $1, $quad_u_loop_tail + br $31, $quad_loop_end $move_one_quad_u: FIXUP_LDST( ldl_u $2, 0($17) ) FIXUP_LDST( ldl_u $3, 8($17) ) - subl $18, 8, $18 - addl $17, 8, $17 extll $2, $4, $22 exthl $3, $4, $23 bis $22, $23, $22 FIXUP_LDST( stl $22, 0($16) ) + subl $18, 8, $18 + addl $17, 8, $17 addl $16, 8, $16 ble $18, $out br $31, $byte_loop_tail -- Gitee From 0fda5305925abe7dfb3220378085b2babc8bda3e Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Mon, 8 Aug 2022 10:04:30 +0800 Subject: [PATCH 25/77] sw64: fix exception handling of deep-copy_user.S Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PN9S -------------------------------- If exception happened inside simd part causing the function to exit, pop stack to make sure everything works correctly. Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/lib/deep-copy_template.S | 16 ++++++++++++---- arch/sw_64/lib/deep-copy_user.S | 21 +++++++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/arch/sw_64/lib/deep-copy_template.S b/arch/sw_64/lib/deep-copy_template.S index aa6ab8e29f05..f2dbc8c50208 100644 --- a/arch/sw_64/lib/deep-copy_template.S +++ b/arch/sw_64/lib/deep-copy_template.S @@ -5,6 +5,10 @@ * * $4: 8-byte misalignment of src when dest is 8-byte aligned * $5: 32-byte misalignment of src when dest is 32-byte aligned + * $7: SIMD status + * 0: not in simd loop + * 1: in simd loop + * 2: in simd_u loop * $16: latest dest, clobbered * $17: latest src, clobbered * $18: bytes left to copy @@ -18,14 +22,16 @@ addl $sp, 0x1f, $23; \ bic $23, 0x1f, $23; \ vstd $f1, 0($23); \ - vstd $f2, 0x20($23) + vstd $f2, 0x20($23); \ + ldi $7, 1 #define RESTORE_SIMD_REGS \ addl $sp, 0x1f, $23; \ bic $23, 0x1f, $23; \ vldd $f1, 0($23); \ vldd $f2, 0x20($23); \ - ldi $sp, 0x60($sp) + ldi $sp, 0x60($sp); \ + bis $31, $31, $7 #define SAVE_SIMD_U_REGS \ ldi $sp, -0xc0($sp); \ @@ -35,7 +41,8 @@ vstd $f2, 0x20($23); \ vstd $f4, 0x40($23); \ vstd $f5, 0x60($23); \ - vstd $f3, 0x80($23) + vstd $f3, 0x80($23); \ + ldi $7, 2 #define RESTORE_SIMD_U_REGS \ addl $sp, 0x1f, $23; \ @@ -45,7 +52,8 @@ vldd $f4, 0x40($23); \ vldd $f5, 0x60($23); \ vldd $f3, 0x80($23); \ - ldi $sp, 0xc0($sp) + ldi $sp, 0xc0($sp); \ + bis $31, $31, $7 ble $18, $out and $16, 7, $1 diff --git a/arch/sw_64/lib/deep-copy_user.S b/arch/sw_64/lib/deep-copy_user.S index 145e1cc6ba18..327cab322765 100644 --- a/arch/sw_64/lib/deep-copy_user.S +++ b/arch/sw_64/lib/deep-copy_user.S @@ -10,13 +10,34 @@ ldi $31, $out-99b($31); \ .previous +/* + * $7: SIMD status + * 0: not in simd loop + * 1: in simd loop + * 2: in simd_u loop + * $18: bytes left to copy + * + */ .globl __copy_user .ent __copy_user __copy_user: .prologue 0 + bis $31, $31, $7 #include "deep-copy_template.S" $out: bis $31, $18, $0 + beq $7, $return + subl $7, 1, $7 + beq $7, $restore_simd + +$restore_simd_u: + RESTORE_SIMD_U_REGS + br $31, $return + +$restore_simd: + RESTORE_SIMD_REGS + +$return: ret .end __copy_user EXPORT_SYMBOL(__copy_user) -- Gitee From ac1bd4fc3bfd75a23612acaee36a0e6e5d6bc2a9 Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Mon, 8 Aug 2022 10:04:31 +0800 Subject: [PATCH 26/77] sw64: switch to inline _copy_{to,from}_user() Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PN9S -------------------------------- Switch to inline version of _copy_{to,from}_user() to improve performance. Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/uaccess.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/sw_64/include/asm/uaccess.h b/arch/sw_64/include/asm/uaccess.h index ceacfaa07cfb..730121aad184 100644 --- a/arch/sw_64/include/asm/uaccess.h +++ b/arch/sw_64/include/asm/uaccess.h @@ -292,6 +292,8 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long len) { return __copy_user((__force void *)to, from, len); } +#define INLINE_COPY_FROM_USER +#define INLINE_COPY_TO_USER extern long __clear_user(void __user *to, long len); -- Gitee From b79d2cf17b4ba10d7d991d785d7e6889af0d491d Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Mon, 8 Aug 2022 10:04:31 +0800 Subject: [PATCH 27/77] sw64: improve deep-copy_template.S Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PN9S -------------------------------- Cache fetch instructions are removed. These instructions will cause more cache misses and negatively impact performance. Some unnecessary code alignment are removed to reduce code size. Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/lib/deep-copy_template.S | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/sw_64/lib/deep-copy_template.S b/arch/sw_64/lib/deep-copy_template.S index f2dbc8c50208..7705eb3f36d4 100644 --- a/arch/sw_64/lib/deep-copy_template.S +++ b/arch/sw_64/lib/deep-copy_template.S @@ -59,7 +59,6 @@ and $16, 7, $1 beq $1, $dest_aligned_8 - .align 3 $byte_loop_head: FIXUP_LDST( ldbu $2, 0($17) ) FIXUP_LDST( stb $2, 0($16) ) @@ -80,7 +79,6 @@ $dest_aligned_8: bne $1, $simd_end bne $4, $quad_u_loop_head - .align 3 $quad_loop_head: FIXUP_LDST( ldl $2, 0($17) ) FIXUP_LDST( stl $2, 0($16) ) @@ -105,7 +103,6 @@ $prep_simd_loop: .align 4 $simd_loop_nc: - fillcs 128 * 5($17) FIXUP_LDST( vldd $f1, 0($17) ) FIXUP_LDST( vldd $f2, 32($17) ) FIXUP_LDST( vstd_nc $f1, 0($16) ) @@ -120,7 +117,6 @@ $simd_loop_nc: .align 4 $simd_loop: - fillcs 128 * 5($17) FIXUP_LDST( vldd $f1, 0($17) ) FIXUP_LDST( vldd $f2, 32($17) ) FIXUP_LDST( vstd $f1, 0($16) ) @@ -186,7 +182,6 @@ $byte_loop_tail: br $31, $out /* misaligned src and dst */ - .align 4 $quad_u_loop_head: FIXUP_LDST( ldl_u $2, 0($17) ) FIXUP_LDST( ldl_u $3, 7($17) ) @@ -219,7 +214,6 @@ $prep_simd_u_loop: .align 4 $simd_u_loop_nc: FIXUP_LDST( vldd $f5, 32($3) ) - fillcs 128 * 5($3) srlow $f4, $f1, $f4 sllow $f5, $f2, $f3 vlogfc $f3, $f4, $f31, $f3 @@ -240,7 +234,6 @@ $simd_u_loop_nc: .align 4 $simd_u_loop: FIXUP_LDST( vldd $f5, 32($3) ) - fillcs 128 * 5($3) srlow $f4, $f1, $f4 sllow $f5, $f2, $f3 vlogfc $f4, $f3, $f31, $f3 -- Gitee From f6fbb406203cd79624cbe3bbfe2ac2a62b8ecf91 Mon Sep 17 00:00:00 2001 From: Wang Yuanheng Date: Fri, 29 Jul 2022 17:18:53 +0800 Subject: [PATCH 28/77] sw64: do not set devint_wken for guest and emulator Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNB8 -------------------------------- It supports host mode only. Qemu reports "unsupported IPU addr" when it runs a guest os or emulator system. Signed-off-by: Wang Yuanheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/sw_64/kernel/pci.c b/arch/sw_64/kernel/pci.c index fcc6e0f02a93..6cc872ba9ca5 100644 --- a/arch/sw_64/kernel/pci.c +++ b/arch/sw_64/kernel/pci.c @@ -614,7 +614,8 @@ void __init sw64_init_arch(void) cpu_num = sw64_chip->get_cpu_num(); for (node = 0; node < cpu_num; node++) { - set_devint_wken(node); + if (is_in_host()) + set_devint_wken(node); rc_enable = sw64_chip_init->pci_init.get_rc_enable(node); if (rc_enable == 0) { printk("PCIe is disabled on node %ld\n", node); -- Gitee From caf686c82b30587f8da327917f1fc110d2a7581a Mon Sep 17 00:00:00 2001 From: Tang Jinyang Date: Tue, 9 Aug 2022 17:07:01 +0800 Subject: [PATCH 29/77] sw64: fix CPUFreq bug Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNC3 -------------------------------- Dynamic frequency scaling cannot change the current cpu frequency when userspace policy is used, and cpuinfo_cur_freq always displays the default value. This patch fixes it. Signed-off-by: Tang Jinyang Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/clock.h | 4 ++-- arch/sw_64/kernel/clock.c | 17 ++++++++++++----- drivers/cpufreq/sw64_cpufreq.c | 2 +- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/sw_64/include/asm/clock.h b/arch/sw_64/include/asm/clock.h index 06ad4bcd6ad3..30983e8e7cc7 100644 --- a/arch/sw_64/include/asm/clock.h +++ b/arch/sw_64/include/asm/clock.h @@ -48,9 +48,9 @@ int sw64_set_rate(int index, unsigned long rate); struct clk *sw64_clk_get(struct device *dev, const char *id); -unsigned long sw64_clk_get_rate(struct clk *clk); - void sw64_update_clockevents(unsigned long cpu, u32 freq); void sw64_store_policy(struct cpufreq_policy *policy); + +unsigned int __sw64_cpufreq_get(struct cpufreq_policy *policy); #endif /* _ASM_SW64_CLOCK_H */ diff --git a/arch/sw_64/kernel/clock.c b/arch/sw_64/kernel/clock.c index f31f596a0052..aa22e9550e29 100644 --- a/arch/sw_64/kernel/clock.c +++ b/arch/sw_64/kernel/clock.c @@ -109,14 +109,21 @@ struct clk *sw64_clk_get(struct device *dev, const char *id) } EXPORT_SYMBOL(sw64_clk_get); -unsigned long sw64_clk_get_rate(struct clk *clk) +unsigned int __sw64_cpufreq_get(struct cpufreq_policy *policy) { - if (!clk) - return 0; + int i; + u64 val; - return (unsigned long)clk->rate; + val = sw64_io_read(0, CLK_CTL); + val = val >> CORE_PLL2_CFG_SHIFT; + + for (i = 0; i < sizeof(cpu_freq)/sizeof(int); i++) { + if (cpu_freq[val] == cpu_freq[i]) + return cpu_freq[i]; + } + return 0; } -EXPORT_SYMBOL(sw64_clk_get_rate); +EXPORT_SYMBOL(__sw64_cpufreq_get); void sw64_store_policy(struct cpufreq_policy *policy) { diff --git a/drivers/cpufreq/sw64_cpufreq.c b/drivers/cpufreq/sw64_cpufreq.c index 5f49b5175d34..b8cd37dac0d9 100644 --- a/drivers/cpufreq/sw64_cpufreq.c +++ b/drivers/cpufreq/sw64_cpufreq.c @@ -59,7 +59,7 @@ static unsigned int sw64_cpufreq_get(unsigned int cpu) return 0; } - return sw64_clk_get_rate(policy->clk); + return __sw64_cpufreq_get(policy) * 1000; } /* -- Gitee From f491e691011fe9354de8ba9d829003ce225d843d Mon Sep 17 00:00:00 2001 From: He Sheng Date: Thu, 11 Aug 2022 13:56:08 +0800 Subject: [PATCH 30/77] sw64: clean up unused hmcall definitions Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- These hmcalls can be removed since nobody invoke them. Signed-off-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/hmcall.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/sw_64/include/asm/hmcall.h b/arch/sw_64/include/asm/hmcall.h index 084a39ba649e..e85397ab06a1 100644 --- a/arch/sw_64/include/asm/hmcall.h +++ b/arch/sw_64/include/asm/hmcall.h @@ -17,16 +17,12 @@ #define HMC_wrksp 0x0E #define HMC_mtinten 0x0F #define HMC_load_mm 0x11 -#define HMC_rdpcbb 0x12 -#define HMC_wrpcbb 0x13 #define HMC_tbisasn 0x14 #define HMC_tbivpn 0x19 #define HMC_ret 0x1A #define HMC_wrvpcr 0x29 #define HMC_wrfen 0x2B -#define HMC_kvcpucb 0x2C #define HMC_sflush 0x2F -#define HMC_swpctx 0x30 #define HMC_entervm 0x31 #define HMC_hcall 0x32 #define HMC_tbi 0x33 @@ -160,8 +156,6 @@ __CALL_HMC_R0(rdksp, unsigned long); __CALL_HMC_W1(wrksp, unsigned long); __CALL_HMC_W2(load_mm, unsigned long, unsigned long); -__CALL_HMC_R0(rdpcbb, unsigned long); -__CALL_HMC_W1(wrpcbb, unsigned long); __CALL_HMC_R0(rdptbr, unsigned long); __CALL_HMC_W1(wrptbr, unsigned long); @@ -170,7 +164,6 @@ __CALL_HMC_RW1(swpipl, unsigned long, unsigned long); __CALL_HMC_R0(whami, unsigned long); __CALL_HMC_RW1(rdio64, unsigned long, unsigned long); __CALL_HMC_RW1(rdio32, unsigned int, unsigned long); -__CALL_HMC_R0(kvcpucb, unsigned long); __CALL_HMC_R0(sleepen, unsigned long); __CALL_HMC_R0(mtinten, unsigned long); __CALL_HMC_W2(wrent, void*, unsigned long); -- Gitee From ff736c2476f18590d8828710a459319896a6be15 Mon Sep 17 00:00:00 2001 From: He Sheng Date: Wed, 15 Jun 2022 16:40:51 +0800 Subject: [PATCH 31/77] sw64: fix instruction fault handler Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNCX -------------------------------- An issue says that when parent traces a child, child does not terminate after signal SIGILL is delivered. This is because BPT/GENTRAP/OPDEC make regs->pc = exc_pc + 4 in hmcode. It used to send SIGILL to child with incorrect regs->pc. As a result, after PTRACE_CONT request is made on child, it will skip the exception instruction and go on. This patch makes `regs->pc = exc_pc` for SIGILL to fix this issue, then restructures code to make it clear. Signed-off-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/traps.c | 64 ++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index a54db1d7045b..f01b88e53ff2 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -31,6 +31,14 @@ #include "proto.h" +enum SW64_IF_TYPES { + IF_BREAKPOINT = 0, + IF_RESERVED, + IF_GENTRAP, + IF_FEN, + IF_OPDEC, +}; + void show_regs(struct pt_regs *regs) { show_regs_print_info(KERN_DEFAULT); @@ -155,6 +163,10 @@ do_entArith(unsigned long summary, unsigned long write_mask, force_sig_fault(SIGFPE, si_code, (void __user *)regs->pc, 0); } +/* + * BPT/GENTRAP/OPDEC make regs->pc = exc_pc + 4. debugger should + * do something necessary to handle it correctly. + */ asmlinkage void do_entIF(unsigned long inst_type, struct pt_regs *regs) { @@ -164,35 +176,23 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) type = inst_type & 0xffffffff; inst = inst_type >> 32; - if (!user_mode(regs) && type != 4) { - if (type == 1) { - const unsigned int *data - = (const unsigned int *) regs->pc; - printk("Kernel bug at %s:%d\n", - (const char *)(data[1] | (long)data[2] << 32), - data[0]); - } else if (type == 0) { + if (!user_mode(regs) && type != IF_OPDEC) { + if (type == IF_BREAKPOINT) { /* support kgdb */ notify_die(0, "kgdb trap", regs, 0, 0, SIGTRAP); return; } - die((type == 1 ? "Kernel Bug" : "Instruction fault"), + die((type == IF_RESERVED ? "Kernel Bug" : "Instruction fault"), regs, type); } switch (type) { - case 0: /* breakpoint */ - if (ptrace_cancel_bpt(current)) - regs->pc -= 4; /* make pc point to former bpt */ - + case IF_BREAKPOINT: /* gdb do pc-4 for sigtrap */ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0); return; - case 1: /* bugcheck */ - force_sig_fault(SIGTRAP, TRAP_UNK, (void __user *)regs->pc, 0); - return; - - case 2: /* gentrap */ + case IF_GENTRAP: + regs->pc -= 4; switch ((long)regs->r16) { case GEN_INTOVF: signo = SIGFPE; @@ -245,6 +245,7 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) case GEN_SUBRNG6: case GEN_SUBRNG7: default: + regs->pc += 4; signo = SIGTRAP; code = TRAP_UNK; break; @@ -253,7 +254,11 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) force_sig_fault(signo, code, (void __user *)regs->pc, regs->r16); return; - case 4: /* opDEC */ + case IF_FEN: + fpu_enable(); + return; + + case IF_OPDEC: switch (inst) { case BREAK_KPROBE: if (notify_die(DIE_BREAK, "kprobe", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) @@ -268,26 +273,15 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) if (notify_die(DIE_UPROBE_XOL, "uprobe_xol", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) return; } - if (!user_mode(regs)) + + if (user_mode(regs)) + regs->pc -= 4; + else die("Instruction fault", regs, type); break; - case 3: /* FEN fault */ - /* - * Irritating users can call HMC_clrfen to disable the - * FPU for the process. The kernel will then trap to - * save and restore the FP registers. - - * Given that GCC by default generates code that uses the - * FP registers, HMC_clrfen is not useful except for DoS - * attacks. So turn the bleeding FPU back on and be done - * with it. - */ - fpu_enable(); - return; - - case 5: /* illoc */ default: /* unexpected instruction-fault type */ + regs->pc -= 4; break; } -- Gitee From a11aeaf7596912b10867b64306a167dc4a1282f0 Mon Sep 17 00:00:00 2001 From: He Sheng Date: Tue, 9 Aug 2022 17:02:26 +0800 Subject: [PATCH 32/77] sw64: clean up unused single step support in kernel Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNCX -------------------------------- Single step is supported by GDB on sw64, and those unused code can be removed. Signed-off-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/ptrace.h | 1 - arch/sw_64/include/asm/thread_info.h | 3 - arch/sw_64/kernel/proto.h | 4 - arch/sw_64/kernel/ptrace.c | 111 +-------------------------- arch/sw_64/kernel/signal.c | 16 ---- 5 files changed, 2 insertions(+), 133 deletions(-) diff --git a/arch/sw_64/include/asm/ptrace.h b/arch/sw_64/include/asm/ptrace.h index ac9943015663..c7267b1432dd 100644 --- a/arch/sw_64/include/asm/ptrace.h +++ b/arch/sw_64/include/asm/ptrace.h @@ -54,7 +54,6 @@ struct pt_regs { unsigned long r18; }; -#define arch_has_single_step() (1) #define user_mode(regs) (((regs)->ps & 8) != 0) #define instruction_pointer(regs) ((regs)->pc) #define profile_pc(regs) instruction_pointer(regs) diff --git a/arch/sw_64/include/asm/thread_info.h b/arch/sw_64/include/asm/thread_info.h index 7cdafaec62e4..31740003d0b2 100644 --- a/arch/sw_64/include/asm/thread_info.h +++ b/arch/sw_64/include/asm/thread_info.h @@ -34,9 +34,6 @@ struct thread_info { int preempt_count; /* 0 => preemptible, <0 => BUG */ unsigned int status; /* thread-synchronous flags */ - int bpt_nsaved; - unsigned long bpt_addr[2]; /* breakpoint handling */ - unsigned int bpt_insn[2]; #ifdef CONFIG_DYNAMIC_FTRACE unsigned long dyn_ftrace_addr; #endif diff --git a/arch/sw_64/kernel/proto.h b/arch/sw_64/kernel/proto.h index f84629ec05ea..8c31eca3cc32 100644 --- a/arch/sw_64/kernel/proto.h +++ b/arch/sw_64/kernel/proto.h @@ -7,10 +7,6 @@ #include #include -/* ptrace.c */ -extern int ptrace_set_bpt(struct task_struct *child); -extern int ptrace_cancel_bpt(struct task_struct *child); - /* traps.c */ extern void show_regs(struct pt_regs *regs); extern void die(char *str, struct pt_regs *regs, long err); diff --git a/arch/sw_64/kernel/ptrace.c b/arch/sw_64/kernel/ptrace.c index 51826cdbe9ef..e8b9ec104e3b 100644 --- a/arch/sw_64/kernel/ptrace.c +++ b/arch/sw_64/kernel/ptrace.c @@ -154,119 +154,12 @@ put_reg(struct task_struct *task, unsigned long regno, unsigned long data) return 0; } -static inline int -read_int(struct task_struct *task, unsigned long addr, int *data) -{ - int copied = access_process_vm(task, addr, data, sizeof(int), FOLL_FORCE); - - return (copied == sizeof(int)) ? 0 : -EIO; -} - -static inline int -write_int(struct task_struct *task, unsigned long addr, int data) -{ - int copied = access_process_vm(task, addr, &data, sizeof(int), - FOLL_FORCE | FOLL_WRITE); - return (copied == sizeof(int)) ? 0 : -EIO; -} - -/* - * Set breakpoint. - */ -int -ptrace_set_bpt(struct task_struct *child) -{ - int displ, i, res, reg_b, nsaved = 0; - unsigned int insn, op_code; - unsigned long pc; - - pc = get_reg(child, REG_PC); - res = read_int(child, pc, (int *)&insn); - if (res < 0) - return res; - - op_code = insn >> 26; - /* br bsr beq bne blt ble bgt bge blbc blbs fbeq fbne fblt fble fbgt fbge */ - if ((1UL << op_code) & 0x3fff000000000030UL) { - /* - * It's a branch: instead of trying to figure out - * whether the branch will be taken or not, we'll put - * a breakpoint at either location. This is simpler, - * more reliable, and probably not a whole lot slower - * than the alternative approach of emulating the - * branch (emulation can be tricky for fp branches). - */ - displ = ((s32)(insn << 11)) >> 9; - task_thread_info(child)->bpt_addr[nsaved++] = pc + 4; - if (displ) /* guard against unoptimized code */ - task_thread_info(child)->bpt_addr[nsaved++] - = pc + 4 + displ; - /*call ret jmp*/ - } else if (op_code >= 0x1 && op_code <= 0x3) { - reg_b = (insn >> 16) & 0x1f; - task_thread_info(child)->bpt_addr[nsaved++] = get_reg(child, reg_b); - } else { - task_thread_info(child)->bpt_addr[nsaved++] = pc + 4; - } - - /* install breakpoints: */ - for (i = 0; i < nsaved; ++i) { - res = read_int(child, task_thread_info(child)->bpt_addr[i], - (int *)&insn); - if (res < 0) - return res; - task_thread_info(child)->bpt_insn[i] = insn; - res = write_int(child, task_thread_info(child)->bpt_addr[i], - BREAKINST); - if (res < 0) - return res; - } - task_thread_info(child)->bpt_nsaved = nsaved; - return 0; -} - /* - * Ensure no single-step breakpoint is pending. Returns non-zero - * value if child was being single-stepped. - */ -int -ptrace_cancel_bpt(struct task_struct *child) -{ - int i, nsaved = task_thread_info(child)->bpt_nsaved; - - task_thread_info(child)->bpt_nsaved = 0; - - if (nsaved > 2) { - printk("%s: bogus nsaved: %d!\n", __func__, nsaved); - nsaved = 2; - } - - for (i = 0; i < nsaved; ++i) { - write_int(child, task_thread_info(child)->bpt_addr[i], - task_thread_info(child)->bpt_insn[i]); - } - return (nsaved != 0); -} - -void user_enable_single_step(struct task_struct *child) -{ - /* Mark single stepping. */ - task_thread_info(child)->bpt_nsaved = -1; -} - -void user_disable_single_step(struct task_struct *child) -{ - ptrace_cancel_bpt(child); -} - -/* - * Called by kernel/ptrace.c when detaching.. - * - * Make sure the single step bit is not set. + * Called by ptrace_detach */ void ptrace_disable(struct task_struct *child) { - user_disable_single_step(child); + /**/ } static int gpr_get(struct task_struct *target, diff --git a/arch/sw_64/kernel/signal.c b/arch/sw_64/kernel/signal.c index a1edc5300742..5b3664ab6591 100644 --- a/arch/sw_64/kernel/signal.c +++ b/arch/sw_64/kernel/signal.c @@ -163,11 +163,6 @@ do_sigreturn(struct sigcontext __user *sc) if (restore_sigcontext(sc, regs)) goto give_sigsegv; - /* Send SIGTRAP if we're single-stepping: */ - if (ptrace_cancel_bpt(current)) { - force_sig_fault(SIGTRAP, TRAP_BRKPT, - (void __user *)regs->pc, 0); - } return; give_sigsegv: @@ -194,11 +189,6 @@ do_rt_sigreturn(struct rt_sigframe __user *frame) if (restore_altstack(&frame->uc.uc_stack)) goto give_sigsegv; - /* Send SIGTRAP if we're single-stepping: */ - if (ptrace_cancel_bpt(current)) { - force_sig_fault(SIGTRAP, TRAP_BRKPT, - (void __user *)regs->pc, 0); - } return; give_sigsegv: @@ -381,19 +371,15 @@ syscall_restart(unsigned long r0, unsigned long r19, static void do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19) { - unsigned long single_stepping = ptrace_cancel_bpt(current); struct ksignal ksig; /* This lets the debugger run, ... */ if (get_signal(&ksig)) { - /* ... so re-check the single stepping. */ - single_stepping |= ptrace_cancel_bpt(current); /* Whee! Actually deliver the signal. */ if (r0) syscall_restart(r0, r19, regs, &ksig.ka); handle_signal(&ksig, regs); } else { - single_stepping |= ptrace_cancel_bpt(current); if (r0) { switch (regs->r0) { case ERESTARTNOHAND: @@ -413,8 +399,6 @@ do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19) } restore_saved_sigmask(); } - if (single_stepping) - ptrace_set_bpt(current); /* re-set breakpoint */ } void -- Gitee From de1f047987a24beb866edaa1e826a9446f62fc24 Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Thu, 11 Aug 2022 09:57:38 +0800 Subject: [PATCH 33/77] sw64: make RO_DATA PAGE_SIZE aligned Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sw_64/kernel/vmlinux.lds.S b/arch/sw_64/kernel/vmlinux.lds.S index a106be42121f..07bc3d8ee7e4 100644 --- a/arch/sw_64/kernel/vmlinux.lds.S +++ b/arch/sw_64/kernel/vmlinux.lds.S @@ -33,7 +33,7 @@ SECTIONS } :text _etext = .; /* End of text section */ - RO_DATA(4096) + RO_DATA(PAGE_SIZE) /* Will be freed after init */ __init_begin = ALIGN(PAGE_SIZE); -- Gitee From cf93127d8cd8eac660929413317221847ca89f90 Mon Sep 17 00:00:00 2001 From: He Sheng Date: Mon, 15 Aug 2022 17:16:09 +0800 Subject: [PATCH 34/77] sw64: remove trap_a* and hae from pt_regs Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- At present, glibc dumps `trap_a*` when a segmentation fault is caught, but no user knows what they mean. That is, nobody care about them, so remove them to reduce overhead of SAVE_COMMON_REGS. Besides, `hae` is legacy code which should be deprecated too. Signed-off-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/ptrace.h | 7 +------ arch/sw_64/include/uapi/asm/sigcontext.h | 15 +++++++-------- arch/sw_64/kernel/asm-offsets.c | 3 --- arch/sw_64/kernel/entry.S | 8 ++------ arch/sw_64/kernel/ptrace.c | 4 ---- arch/sw_64/kernel/signal.c | 4 ---- 6 files changed, 10 insertions(+), 31 deletions(-) diff --git a/arch/sw_64/include/asm/ptrace.h b/arch/sw_64/include/asm/ptrace.h index c7267b1432dd..b5afebf82939 100644 --- a/arch/sw_64/include/asm/ptrace.h +++ b/arch/sw_64/include/asm/ptrace.h @@ -40,12 +40,7 @@ struct pt_regs { unsigned long r26; unsigned long r27; unsigned long r28; - unsigned long hae; -/* JRP - These are the values provided to a0-a2 by HMcode */ - unsigned long trap_a0; - unsigned long trap_a1; - unsigned long trap_a2; -/* These are saved by HMcode: */ + /* These are saved by HMcode: */ unsigned long ps; unsigned long pc; unsigned long gp; diff --git a/arch/sw_64/include/uapi/asm/sigcontext.h b/arch/sw_64/include/uapi/asm/sigcontext.h index facbf34e920d..11d7eece86ef 100644 --- a/arch/sw_64/include/uapi/asm/sigcontext.h +++ b/arch/sw_64/include/uapi/asm/sigcontext.h @@ -2,15 +2,13 @@ #ifndef _UAPI_ASM_SW64_SIGCONTEXT_H #define _UAPI_ASM_SW64_SIGCONTEXT_H +/* + * Signal context structure + * + * The context is saved before a signal handler is invoked, and it is + * restored by sys_sigreturn / sys_rt_sigreturn. + */ struct sigcontext { - /* - * What should we have here? I'd probably better use the same - * stack layout as DEC Unix, just in case we ever want to try - * running their binaries.. - * - * This is the basic layout, but I don't know if we'll ever - * actually fill in all the values.. - */ long sc_onstack; long sc_mask; long sc_pc; @@ -19,6 +17,7 @@ struct sigcontext { long sc_ownedfp; long sc_fpregs[128]; /* SIMD-FP */ unsigned long sc_fpcr; + /* TODO: Following are unused, to be removed and synced with libc */ unsigned long sc_fp_control; unsigned long sc_reserved1, sc_reserved2; unsigned long sc_ssize; diff --git a/arch/sw_64/kernel/asm-offsets.c b/arch/sw_64/kernel/asm-offsets.c index 56c5daaa413c..86ed5b8c216e 100644 --- a/arch/sw_64/kernel/asm-offsets.c +++ b/arch/sw_64/kernel/asm-offsets.c @@ -88,9 +88,6 @@ void foo(void) DEFINE(PT_REGS_R26, offsetof(struct pt_regs, r26)); DEFINE(PT_REGS_R27, offsetof(struct pt_regs, r27)); DEFINE(PT_REGS_R28, offsetof(struct pt_regs, r28)); - DEFINE(PT_REGS_TRAP_A0, offsetof(struct pt_regs, trap_a0)); - DEFINE(PT_REGS_TRAP_A1, offsetof(struct pt_regs, trap_a1)); - DEFINE(PT_REGS_TRAP_A2, offsetof(struct pt_regs, trap_a2)); DEFINE(PT_REGS_PS, offsetof(struct pt_regs, ps)); DEFINE(PT_REGS_PC, offsetof(struct pt_regs, pc)); DEFINE(PT_REGS_GP, offsetof(struct pt_regs, gp)); diff --git a/arch/sw_64/kernel/entry.S b/arch/sw_64/kernel/entry.S index 01896128ed23..a52665c3bb08 100644 --- a/arch/sw_64/kernel/entry.S +++ b/arch/sw_64/kernel/entry.S @@ -14,11 +14,10 @@ /* * This defines the normal kernel pt-regs layout. * - * regs 9-15 preserved by C code + * regs 9-15 preserved by C code, saving to pt_regs will make + * them easier to be accessed in an unified way. * regs 16-18 saved by HMcode * regs 29-30 saved and set up by HMcode - * JRP - Save regs 16-18 in a special area of the stack, so that - * the hmcode-provided values are available to the signal handler. */ .macro SAVE_COMMON_REGS @@ -42,9 +41,6 @@ stl $25, PT_REGS_R25($sp) stl $26, PT_REGS_R26($sp) stl $27, PT_REGS_R27($sp) - stl $16, PT_REGS_TRAP_A0($sp) - stl $17, PT_REGS_TRAP_A1($sp) - stl $18, PT_REGS_TRAP_A2($sp) .endm .macro RESTORE_COMMON_REGS diff --git a/arch/sw_64/kernel/ptrace.c b/arch/sw_64/kernel/ptrace.c index e8b9ec104e3b..f3bc1020eaff 100644 --- a/arch/sw_64/kernel/ptrace.c +++ b/arch/sw_64/kernel/ptrace.c @@ -504,10 +504,6 @@ static const struct pt_regs_offset regoffset_table[] = { REG_OFFSET_NAME(r26), REG_OFFSET_NAME(r27), REG_OFFSET_NAME(r28), - REG_OFFSET_NAME(hae), - REG_OFFSET_NAME(trap_a0), - REG_OFFSET_NAME(trap_a1), - REG_OFFSET_NAME(trap_a2), REG_OFFSET_NAME(ps), REG_OFFSET_NAME(pc), REG_OFFSET_NAME(gp), diff --git a/arch/sw_64/kernel/signal.c b/arch/sw_64/kernel/signal.c index 5b3664ab6591..32c9484d2aa2 100644 --- a/arch/sw_64/kernel/signal.c +++ b/arch/sw_64/kernel/signal.c @@ -255,10 +255,6 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, offsetof(struct user_fpsimd_state, fpcr)); err |= __put_user(current->thread.fpstate.fpcr, &sc->sc_fpcr); - err |= __put_user(regs->trap_a0, &sc->sc_traparg_a0); - err |= __put_user(regs->trap_a1, &sc->sc_traparg_a1); - err |= __put_user(regs->trap_a2, &sc->sc_traparg_a2); - return err; } -- Gitee From d4e0c4a8ebcc8551debcd9211f100b6472ef9d59 Mon Sep 17 00:00:00 2001 From: Yang Qiang Date: Mon, 15 Aug 2022 14:38:18 +0800 Subject: [PATCH 35/77] efi: do some cleanups for efi_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Since GRUB has fixed the member's physical address of EFI memory descriptor, there is no need to add stale stuff here. Signed-off-by: Yang Qiang Signed-off-by: Gu Zitao --- drivers/firmware/efi/sunway-init.c | 36 +----------------------------- 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/drivers/firmware/efi/sunway-init.c b/drivers/firmware/efi/sunway-init.c index 9871508df58c..b130218634fb 100644 --- a/drivers/firmware/efi/sunway-init.c +++ b/drivers/firmware/efi/sunway-init.c @@ -25,8 +25,6 @@ #include -extern bool __virt_addr_valid(unsigned long x); - static int __init is_memory(efi_memory_desc_t *md) { if (md->attribute & (EFI_MEMORY_WB|EFI_MEMORY_WT|EFI_MEMORY_WC)) @@ -128,23 +126,7 @@ static __init int is_usable_memory(efi_memory_desc_t *md) } return false; } -static __initdata char memory_type_name1[][20] = { - "Reserved", - "Loader Code", - "Loader Data", - "Boot Code", - "Boot Data", - "Runtime Code", - "Runtime Data", - "Conventional Memory", - "Unusable Memory", - "ACPI Reclaim Memory", - "ACPI Memory NVS", - "Memory Mapped I/O", - "MMIO Port Space", - "PAL Code", - "Persistent Memory", -}; + static __init void reserve_regions(void) { efi_memory_desc_t *md; @@ -157,22 +139,6 @@ static __init void reserve_regions(void) paddr = md->phys_addr; npages = md->num_pages; - if (!__virt_addr_valid(paddr)) - continue; - - if (md->type >= ARRAY_SIZE(memory_type_name1)) - continue; - - if (md->attribute & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT | - EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO | - EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP | - EFI_MEMORY_NV | - EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE)) - continue; - - if (strncmp(memory_type_name1[md->type], "Reserved", 8) == 0) - continue; - if (efi_enabled(EFI_DBG)) { char buf[64]; -- Gitee From fca1989f78a93d742956ea19c91025e8edc623b7 Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Fri, 12 Aug 2022 15:24:06 +0800 Subject: [PATCH 36/77] sw64: optimize instruction usage in fork routine Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNDF -------------------------------- Use 'call' instead of 'jmp' in ret_from_fork(), so the 'ret' in schedule_tail() won't mess up branch prediction. Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sw_64/kernel/entry.S b/arch/sw_64/kernel/entry.S index a52665c3bb08..67bafd4a930a 100644 --- a/arch/sw_64/kernel/entry.S +++ b/arch/sw_64/kernel/entry.S @@ -426,7 +426,7 @@ __switch_to: .ent ret_from_fork ret_from_fork: ldi $26, ret_from_sys_call - jmp $31, schedule_tail + call $31, schedule_tail .end ret_from_fork /* -- Gitee From 9a8c09fcadb2d225a4222df7d308a23cf78591ad Mon Sep 17 00:00:00 2001 From: He Sheng Date: Wed, 17 Aug 2022 10:05:26 +0800 Subject: [PATCH 37/77] sw64: ensure IRQs are off when switch/load/activate mm context Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNEN -------------------------------- This is because of commit f98db6013c55 ("sched/core: Add switch_mm_irqs_off() and use it in the scheduler") in which switch_mm_irqs_off() is called by the scheduler, vs switch_mm() which is used by use_mm(). This patch mirrors the x86 code, ie. it disables interrupt in switch_mm(), and optimises the scheduler case by defining switch_mm_irqs_off(). After that, the asn_lock and need_new_asn in cpu_data are no longer needed. This patch also moves __load_new_mm_context() into flush_tlb_current() and make sure IRQs are off over it. Signed-off-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/hw_init.h | 2 -- arch/sw_64/include/asm/mmu_context.h | 48 ++++++++-------------------- arch/sw_64/include/asm/switch_to.h | 1 - arch/sw_64/include/asm/tlbflush.h | 21 +++++++++--- arch/sw_64/kernel/setup.c | 2 -- arch/sw_64/kernel/smp.c | 2 -- arch/sw_64/mm/fault.c | 16 ---------- 7 files changed, 31 insertions(+), 61 deletions(-) diff --git a/arch/sw_64/include/asm/hw_init.h b/arch/sw_64/include/asm/hw_init.h index 545e9a99a49c..a36c811839ea 100644 --- a/arch/sw_64/include/asm/hw_init.h +++ b/arch/sw_64/include/asm/hw_init.h @@ -20,8 +20,6 @@ struct cache_desc { struct cpuinfo_sw64 { unsigned long loops_per_jiffy; unsigned long last_asn; - int need_new_asn; - int asn_lock; unsigned long ipi_count; struct cache_desc icache; /* Primary I-cache */ struct cache_desc dcache; /* Primary D or combined I/D cache */ diff --git a/arch/sw_64/include/asm/mmu_context.h b/arch/sw_64/include/asm/mmu_context.h index 10199db1d637..84e84048a3ba 100644 --- a/arch/sw_64/include/asm/mmu_context.h +++ b/arch/sw_64/include/asm/mmu_context.h @@ -13,10 +13,9 @@ #include /* - * Force a context reload. This is needed when we change the page - * table pointer or when we update the ASID of the current process. + * Load a mm context. This is needed when we change the page + * table pointer(CSR:PTBR) or when we update the ASID. * - * CSR:UPN holds ASID and CSR:PTBR holds page table pointer. */ #define load_asn_ptbr load_mm @@ -69,17 +68,13 @@ __get_new_mm_context(struct mm_struct *mm, long cpu) } static inline void -switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, - struct task_struct *next) +switch_mm_irqs_off(struct mm_struct *prev_mm, struct mm_struct *next_mm, + struct task_struct *next) { /* Check if our ASN is of an older version, and thus invalid. */ unsigned long asn, mmc, ptbr; long cpu = smp_processor_id(); -#ifdef CONFIG_SMP - cpu_data[cpu].asn_lock = 1; - barrier(); -#endif asn = cpu_last_asn(cpu); mmc = next_mm->context.asid[cpu]; if ((mmc ^ asn) & ~HARDWARE_ASN_MASK) { @@ -87,10 +82,6 @@ switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, mmc = __get_new_mm_context(next_mm, cpu); next_mm->context.asid[cpu] = mmc; } -#ifdef CONFIG_SMP - else - cpu_data[cpu].need_new_asn = 1; -#endif /* * Update CSR:UPN and CSR:PTBR. Another thread may have allocated @@ -102,31 +93,20 @@ switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, load_asn_ptbr(asn, ptbr); } -extern void __load_new_mm_context(struct mm_struct *); - -#ifdef CONFIG_SMP -#define check_mmu_context() \ -do { \ - int cpu = smp_processor_id(); \ - cpu_data[cpu].asn_lock = 0; \ - barrier(); \ - if (cpu_data[cpu].need_new_asn) { \ - struct mm_struct *mm = current->active_mm; \ - cpu_data[cpu].need_new_asn = 0; \ - if (!mm->context.asid[cpu]) \ - __load_new_mm_context(mm); \ - } \ -} while (0) -#else -#define check_mmu_context() do { } while (0) -#endif +#define switch_mm_irqs_off switch_mm_irqs_off -static inline void activate_mm(struct mm_struct *prev_mm, - struct mm_struct *next_mm) +static inline void +switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, + struct task_struct *tsk) { - __load_new_mm_context(next_mm); + unsigned long flags; + + local_irq_save(flags); + switch_mm_irqs_off(prev_mm, next_mm, tsk); + local_irq_restore(flags); } +#define activate_mm(prev, next) switch_mm(prev, next, current) #define deactivate_mm(tsk, mm) do { } while (0) static inline int init_new_context(struct task_struct *tsk, diff --git a/arch/sw_64/include/asm/switch_to.h b/arch/sw_64/include/asm/switch_to.h index 967fe1d680da..e5596a735b2d 100644 --- a/arch/sw_64/include/asm/switch_to.h +++ b/arch/sw_64/include/asm/switch_to.h @@ -48,7 +48,6 @@ do { \ struct task_struct *__next = (next); \ __switch_to_aux(__prev, __next); \ (last) = __switch_to(__prev, __next); \ - check_mmu_context(); \ } while (0) diff --git a/arch/sw_64/include/asm/tlbflush.h b/arch/sw_64/include/asm/tlbflush.h index e508a4d66d37..b35af83e6ec2 100644 --- a/arch/sw_64/include/asm/tlbflush.h +++ b/arch/sw_64/include/asm/tlbflush.h @@ -8,13 +8,26 @@ #include #include #include - -extern void __load_new_mm_context(struct mm_struct *); - +#include static inline void flush_tlb_current(struct mm_struct *mm) { - __load_new_mm_context(mm); + unsigned long mmc, asn, ptbr, flags; + + local_irq_save(flags); + + mmc = __get_new_mm_context(mm, smp_processor_id()); + mm->context.asid[smp_processor_id()] = mmc; + + /* + * Force a new ASN for a task. Note that there is no way to + * write UPN only now, so call load_asn_ptbr here. + */ + asn = mmc & HARDWARE_ASN_MASK; + ptbr = virt_to_pfn(mm->pgd); + load_asn_ptbr(asn, ptbr); + + local_irq_restore(flags); } /* diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index 39103e4edee4..cb04aaa0cb97 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -145,8 +145,6 @@ void store_cpu_data(int cpu) { cpu_data[cpu].loops_per_jiffy = loops_per_jiffy; cpu_data[cpu].last_asn = ASN_FIRST_VERSION; - cpu_data[cpu].need_new_asn = 0; - cpu_data[cpu].asn_lock = 0; } #ifdef CONFIG_KEXEC diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index 8f752c604db0..b66608c4934b 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -499,8 +499,6 @@ void flush_tlb_all(void) on_each_cpu(ipi_flush_tlb_all, NULL, 1); } -#define asn_locked() (cpu_data[smp_processor_id()].asn_lock) - static void ipi_flush_tlb_mm(void *x) { struct mm_struct *mm = (struct mm_struct *) x; diff --git a/arch/sw_64/mm/fault.c b/arch/sw_64/mm/fault.c index 126752771b11..574fe7930aac 100644 --- a/arch/sw_64/mm/fault.c +++ b/arch/sw_64/mm/fault.c @@ -61,22 +61,6 @@ void show_all_vma(void) } } -/* - * Force a new ASN for a task. - */ -void __load_new_mm_context(struct mm_struct *next_mm) -{ - unsigned long mmc, asn, ptbr; - - mmc = __get_new_mm_context(next_mm, smp_processor_id()); - next_mm->context.asid[smp_processor_id()] = mmc; - - asn = mmc & HARDWARE_ASN_MASK; - ptbr = virt_to_pfn(next_mm->pgd); - - load_asn_ptbr(asn, ptbr); -} - /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to handle_mm_fault(). -- Gitee From 8f4d572d2ab467c1820193ebd2ee78f9f2cf7b0c Mon Sep 17 00:00:00 2001 From: He Sheng Date: Fri, 19 Aug 2022 16:42:27 +0800 Subject: [PATCH 38/77] sw64: init sp with kernel stack top for idle thread Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNF1 -------------------------------- For secondary CPU, it has to do ($sp - THREAD_SIZE) to find current thread_info. Signed-off-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/asm-offsets.c | 1 + arch/sw_64/kernel/head.S | 7 +++---- arch/sw_64/kernel/smp.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/sw_64/kernel/asm-offsets.c b/arch/sw_64/kernel/asm-offsets.c index 86ed5b8c216e..12b3311c1bcb 100644 --- a/arch/sw_64/kernel/asm-offsets.c +++ b/arch/sw_64/kernel/asm-offsets.c @@ -218,4 +218,5 @@ void foo(void) OFFSET(TASK_THREAD_S5, task_struct, thread.s[5]); OFFSET(TASK_THREAD_S6, task_struct, thread.s[6]); BLANK(); + DEFINE(ASM_THREAD_SIZE, THREAD_SIZE); } diff --git a/arch/sw_64/kernel/head.S b/arch/sw_64/kernel/head.S index 4b2db3891dcb..3dfb95c91d70 100644 --- a/arch/sw_64/kernel/head.S +++ b/arch/sw_64/kernel/head.S @@ -24,7 +24,7 @@ __start: /* We need to get current_task_info loaded up... */ ldi $8, init_thread_union /* ... and find our stack ... */ - ldi $30, 0x4000 - PT_REGS_SIZE($8) + ldi $30, ASM_THREAD_SIZE($8) /* ... and then we can clear bss data. */ ldi $2, __bss_start ldi $3, __bss_stop @@ -51,7 +51,7 @@ __start: ldl $29, 0($30) addl $29, $0, $29 /* Repoint the sp into the new kernel image */ - ldi $30, 0x4000 - PT_REGS_SIZE($8) + ldi $30, ASM_THREAD_SIZE($8) #endif /* ... and then we can start the kernel. */ call $26, sw64_start_kernel @@ -84,8 +84,7 @@ __smp_callin: s8addl $0, $2, $2 ldl $30, 0($2) # Get ksp of idle thread - ldi $8, 0x3fff # Find "current". - bic $30, $8, $8 + ldi $8, -ASM_THREAD_SIZE($30) # Find "current" call $26, smp_callin sys_call HMC_halt diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index b66608c4934b..f74b172ecb25 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -159,7 +159,7 @@ static int secondary_cpu_start(int cpuid, struct task_struct *idle) /* * Precalculate the target ksp. */ - tidle_ksp[cpuid] = idle->stack + sizeof(union thread_union) - 16; + tidle_ksp[cpuid] = idle->stack + THREAD_SIZE; DBGS("Starting secondary cpu %d: state 0x%lx\n", cpuid, idle->state); -- Gitee From 1c379cae0198df499ddd2f27b3c28d12af956d4c Mon Sep 17 00:00:00 2001 From: Min Fanlei Date: Tue, 23 Aug 2022 09:18:11 +0800 Subject: [PATCH 39/77] sw64: kvm: fix wrong info print of KVM_MEMHOTPLUG Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I56WV8 -------------------------------- Due to the registration of guest IO address, there is wrong pr_info of "KVM MEMHOTPLUG support" when booting the guest, so we fix it. Signed-off-by: Min Fanlei Signed-off-by: Gu Zitao --- arch/sw_64/kvm/kvm-sw64.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/sw_64/kvm/kvm-sw64.c b/arch/sw_64/kvm/kvm-sw64.c index 9d209141820c..06e969caaaa6 100644 --- a/arch/sw_64/kvm/kvm-sw64.c +++ b/arch/sw_64/kvm/kvm-sw64.c @@ -308,6 +308,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (change == KVM_MR_FLAGS_ONLY || change == KVM_MR_DELETE) return 0; + if (test_bit(IO_MARK_BIT, &(mem->guest_phys_addr))) + return 0; + + if (test_bit(IO_MARK_BIT + 1, &(mem->guest_phys_addr))) + return 0; + #ifndef CONFIG_KVM_MEMHOTPLUG if (mem->guest_phys_addr) { pr_info("%s, No KVM MEMHOTPLUG support!\n", __func__); @@ -315,12 +321,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, } #endif - if (test_bit(IO_MARK_BIT, &(mem->guest_phys_addr))) - return 0; - - if (test_bit(IO_MARK_BIT + 1, &(mem->guest_phys_addr))) - return 0; - if (!sw64_kvm_pool) return -ENOMEM; -- Gitee From 32721d3e5566cdd214d5e85109e5e5ccd2592a98 Mon Sep 17 00:00:00 2001 From: He Chuyue Date: Wed, 24 Aug 2022 09:56:54 +0800 Subject: [PATCH 40/77] sw64: perf: fix perf_get_regs_user Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I56X48 -------------------------------- The past implementation of perf_get_regs_user could not get regs by default, but it was actually available via task_pt_regs. Fix it now. Signed-off-by: He Chuyue Signed-off-by: Gu Zitao --- arch/sw_64/kernel/perf_regs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sw_64/kernel/perf_regs.c b/arch/sw_64/kernel/perf_regs.c index 4c12a2cdf912..b036f213936b 100644 --- a/arch/sw_64/kernel/perf_regs.c +++ b/arch/sw_64/kernel/perf_regs.c @@ -28,6 +28,6 @@ u64 perf_reg_abi(struct task_struct *task) void perf_get_regs_user(struct perf_regs *regs_user, struct pt_regs *regs) { - regs_user->regs = NULL; - regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); } -- Gitee From df00532d7039f7f36c0d227ac50c41e5c533e3ab Mon Sep 17 00:00:00 2001 From: Hang Xiaoqian Date: Wed, 24 Aug 2022 17:32:11 +0800 Subject: [PATCH 41/77] sw64: switch to generic calibrate_delay() Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- There is no need to use per cpu loops_per_jiffy, so we remove loops_per_jiffy from struct cpuinfo_sw64. Signed-off-by: Hang Xiaoqian Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 4 ++++ arch/sw_64/include/asm/hw_init.h | 1 - arch/sw_64/kernel/setup.c | 1 - arch/sw_64/kernel/smp.c | 12 +----------- arch/sw_64/kernel/time.c | 10 ++-------- arch/sw_64/lib/udelay.c | 6 ------ drivers/cpufreq/sw64_cpufreq.c | 4 +--- 7 files changed, 8 insertions(+), 30 deletions(-) diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index 392f7806afcb..36cddefb1ad4 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -142,6 +142,10 @@ config ARCH_HAS_ILOG2_U64 config GENERIC_GPIO bool +config GENERIC_CALIBRATE_DELAY + bool + default y + config ZONE_DMA32 bool default y diff --git a/arch/sw_64/include/asm/hw_init.h b/arch/sw_64/include/asm/hw_init.h index a36c811839ea..e0a5706710cd 100644 --- a/arch/sw_64/include/asm/hw_init.h +++ b/arch/sw_64/include/asm/hw_init.h @@ -18,7 +18,6 @@ struct cache_desc { }; struct cpuinfo_sw64 { - unsigned long loops_per_jiffy; unsigned long last_asn; unsigned long ipi_count; struct cache_desc icache; /* Primary I-cache */ diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index cb04aaa0cb97..2d2a8c6d4b4e 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -143,7 +143,6 @@ EXPORT_SYMBOL(screen_info); */ void store_cpu_data(int cpu) { - cpu_data[cpu].loops_per_jiffy = loops_per_jiffy; cpu_data[cpu].last_asn = ASN_FIRST_VERSION; } diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index f74b172ecb25..c0936d119c4e 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -374,18 +374,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) void __init native_smp_cpus_done(unsigned int max_cpus) { - int cpu; - unsigned long bogosum = 0; - - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (cpu_online(cpu)) - bogosum += cpu_data[cpu].loops_per_jiffy; - smp_booted = 1; - pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - num_online_cpus(), - (bogosum + 2500) / (500000/HZ), - ((bogosum + 2500) / (5000/HZ)) % 100); + pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); } int setup_profiling_timer(unsigned int multiplier) diff --git a/arch/sw_64/kernel/time.c b/arch/sw_64/kernel/time.c index 6a4c8a31465c..3aa55c886e38 100644 --- a/arch/sw_64/kernel/time.c +++ b/arch/sw_64/kernel/time.c @@ -111,14 +111,8 @@ time_init(void) of_clk_init(NULL); /* Startup the timer source. */ setup_timer(); -} - -void calibrate_delay(void) -{ - loops_per_jiffy = get_cpu_freq() / HZ; - pr_info("Clock rate yields %lu.%02lu BogoMIPS (lpj=%lu)\n", - loops_per_jiffy / (500000 / HZ), - (loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy); + /* Calibrate the delay loop directly */ + lpj_fine = cycle_freq / HZ; } static void __init calibrate_sched_clock(void) diff --git a/arch/sw_64/lib/udelay.c b/arch/sw_64/lib/udelay.c index 48356ab8872f..59ca8a97d748 100644 --- a/arch/sw_64/lib/udelay.c +++ b/arch/sw_64/lib/udelay.c @@ -28,12 +28,6 @@ void __delay(unsigned long loops) } EXPORT_SYMBOL(__delay); -#ifdef CONFIG_SMP -#define LPJ cpu_data[smp_processor_id()].loops_per_jiffy -#else -#define LPJ loops_per_jiffy -#endif - void udelay(unsigned long usecs) { unsigned long loops = usecs * get_cpu_freq() / 1000000; diff --git a/drivers/cpufreq/sw64_cpufreq.c b/drivers/cpufreq/sw64_cpufreq.c index b8cd37dac0d9..71f944de934b 100644 --- a/drivers/cpufreq/sw64_cpufreq.c +++ b/drivers/cpufreq/sw64_cpufreq.c @@ -40,10 +40,8 @@ static int sw64_cpu_freq_notifier(struct notifier_block *nb, unsigned long cpu; for_each_online_cpu(cpu) { - if (val == CPUFREQ_POSTCHANGE) { + if (val == CPUFREQ_POSTCHANGE) sw64_update_clockevents(cpu, freqs->new * 1000); - current_cpu_data.loops_per_jiffy = loops_per_jiffy; - } } return 0; -- Gitee From c4cf764161580a2b12f10af6189d6617ab92f1b8 Mon Sep 17 00:00:00 2001 From: Dai Xin Date: Wed, 24 Aug 2022 16:38:29 +0000 Subject: [PATCH 42/77] sw64: lib: fix __iowrite64_copy() Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- The mb() is supposed to be in the loop as what __iowrite32_copy() does. Besides, CONFIG_64BIT is always true for SW64, so remove the redundant #if statement. Signed-off-by: Dai Xin Signed-off-by: Gu Zitao --- arch/sw_64/lib/iomap_copy.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/sw_64/lib/iomap_copy.c b/arch/sw_64/lib/iomap_copy.c index 10e756fffff5..1c75bd602d7e 100644 --- a/arch/sw_64/lib/iomap_copy.c +++ b/arch/sw_64/lib/iomap_copy.c @@ -41,15 +41,12 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) { -#ifdef CONFIG_64BIT u64 __iomem *dst = to; const u64 *src = from; const u64 *end = src + count; - while (src < end) + while (src < end) { __raw_writeq(*src++, dst++); mb(); -#else - __iowrite32_copy(to, from, count * 2); -#endif + } } -- Gitee From de2feaab55d9964e993538e7ce3a84467283b886 Mon Sep 17 00:00:00 2001 From: Tang Jinyang Date: Wed, 24 Aug 2022 10:52:49 +0800 Subject: [PATCH 43/77] sw64: fix get_cpu_freq() bug Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I56QDM -------------------------------- The cpu_desc.frequency used to be unmodified in the presence of CPU frequency scaling, and get_cpu_freq() always return initial value. It has to maintain a constant correlation to the target frequency. Signed-off-by: Tang Jinyang Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/clock.h | 2 +- arch/sw_64/include/asm/hw_init.h | 7 +++++++ arch/sw_64/kernel/clock.c | 6 +++--- drivers/cpufreq/sw64_cpufreq.c | 6 +++--- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/sw_64/include/asm/clock.h b/arch/sw_64/include/asm/clock.h index 30983e8e7cc7..88714eb08507 100644 --- a/arch/sw_64/include/asm/clock.h +++ b/arch/sw_64/include/asm/clock.h @@ -44,7 +44,7 @@ struct clk { int clk_init(void); -int sw64_set_rate(int index, unsigned long rate); +void sw64_set_rate(unsigned long rate); struct clk *sw64_clk_get(struct device *dev, const char *id); diff --git a/arch/sw_64/include/asm/hw_init.h b/arch/sw_64/include/asm/hw_init.h index e0a5706710cd..8a28aac2e54f 100644 --- a/arch/sw_64/include/asm/hw_init.h +++ b/arch/sw_64/include/asm/hw_init.h @@ -82,6 +82,13 @@ static inline unsigned long get_cpu_freq(void) return cpu_desc.frequency; } +static inline void update_cpu_freq(unsigned long freq) +{ + freq = freq * 1000000; + if (cpu_desc.frequency != freq) + cpu_desc.frequency = freq; +} + #define EMUL_FLAG (0x1UL << 63) #define MMSIZE_MASK (EMUL_FLAG - 1) diff --git a/arch/sw_64/kernel/clock.c b/arch/sw_64/kernel/clock.c index aa22e9550e29..32f01d4b8255 100644 --- a/arch/sw_64/kernel/clock.c +++ b/arch/sw_64/kernel/clock.c @@ -131,15 +131,17 @@ void sw64_store_policy(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(sw64_store_policy); -int sw64_set_rate(int index, unsigned long rate) +void sw64_set_rate(unsigned long rate) { unsigned int i, val; + int index = -1; rate /= 1000000; for (i = 0; i < sizeof(cpu_freq)/sizeof(int); i++) { if (rate == cpu_freq[i]) { index = i; + update_cpu_freq(cpu_freq[i]); break; } } @@ -185,7 +187,5 @@ int sw64_set_rate(int index, unsigned long rate) /* LV1 select PLL0/PLL1 */ sw64_io_write(0, CLU_LV1_SEL, CLK_LV1_SEL_MUXB | CLK_LV1_SEL_PRT); sw64_io_write(1, CLU_LV1_SEL, CLK_LV1_SEL_MUXB | CLK_LV1_SEL_PRT); - - return index; } EXPORT_SYMBOL_GPL(sw64_set_rate); diff --git a/drivers/cpufreq/sw64_cpufreq.c b/drivers/cpufreq/sw64_cpufreq.c index 71f944de934b..819d8f1437e2 100644 --- a/drivers/cpufreq/sw64_cpufreq.c +++ b/drivers/cpufreq/sw64_cpufreq.c @@ -68,12 +68,12 @@ static int sw64_cpufreq_target(struct cpufreq_policy *policy, { unsigned long freq; - freq = (get_cpu_freq() / 1000) * index / 48; + freq = 50000 * index; sw64_store_policy(policy); /* setting the cpu frequency */ - sw64_set_rate(-1, freq * 1000); + sw64_set_rate(freq * 1000); return 0; } @@ -98,7 +98,7 @@ static int sw64_cpufreq_cpu_init(struct cpufreq_policy *policy) if (sw64_clockmod_table[i].frequency == 0) sw64_clockmod_table[i].frequency = (rate * i) / 48; - sw64_set_rate(-1, rate * 1000); + sw64_set_rate(rate * 1000); policy->clk = cpuclk; -- Gitee From 552045fbd9ea229db4611dc5226a86701486e643 Mon Sep 17 00:00:00 2001 From: Zhou Xuemei Date: Thu, 25 Aug 2022 14:15:08 +0800 Subject: [PATCH 44/77] sw64: pcie: fix piu configuration to ensure data correctness Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNFK -------------------------------- If a device does not use msi but uses polling as the completion flag of DMAW events, the current PIUCONFIG0 register configuration can not guarantee completion order of DMAW. It may happen that completion flag has been polled before data written to memory, causing users to access incorrect data. To ensure correctness of data, DMAW order on PIU should be controlled. That is, DMAW request with Relaxed Ordering off has to wait until the previous write request receive response before submitting it. This will significantly degrade DMAW performance for devices without Relaxed Ordering capability. Signed-off-by: Zhou Xuemei Signed-off-by: Gu Zitao --- arch/sw_64/chip/chip3/chip.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sw_64/chip/chip3/chip.c b/arch/sw_64/chip/chip3/chip.c index 8697891e3930..105389d5989f 100644 --- a/arch/sw_64/chip/chip3/chip.c +++ b/arch/sw_64/chip/chip3/chip.c @@ -393,7 +393,6 @@ static void chip3_set_rc_piu(unsigned long node, unsigned long index) /* set DMA offset value PCITODMA_OFFSET */ write_piu_ior0(node, index, EPDMABAR, PCITODMA_OFFSET); if (IS_ENABLED(CONFIG_PCI_MSI)) { - write_piu_ior0(node, index, PIUCONFIG0, 0x38076); write_piu_ior0(node, index, MSIADDR, MSIX_MSG_ADDR); for (i = 0; i < 256; i++) write_piu_ior0(node, index, MSICONFIG0 + (i << 7), 0); -- Gitee From 671df871864946e5f7bf8392938d5028e7a5f635 Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Mon, 29 Aug 2022 17:33:32 +0800 Subject: [PATCH 45/77] sw64: bpf: fix ebpf jit compiler Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNGJ -------------------------------- This patch makes following changes to ebpf jit compiler: * implement proper XADD instructions * implement 32-bit ARSH instructions * implement DIV and MOD instructions using helper functions * reorganize header file to make it easier to read * optimize load immediate helper functions * make sure ILLEGAL_INSN will throw instruction fault * make sure fields in jited instrctions won't overflow * restore GP register when exit * make sure 32-bit alu functions are unsigned * make sure 32-bit results are zero extended to 64 bits * make sure function addr are stored in $27 so callee can calculate GP correctly * track free temporary registers to make sure we won't accidentally clobber useful data * fix register mapping * fix host to be algorithm * fix offset calculation of branch instructions * fix tail call Result of "test_bpf.ko": 378 PASSED, 0 FAILED, [366/366 JIT'ed] Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/net/bpf_jit.h | 297 +++++----- arch/sw_64/net/bpf_jit_comp.c | 1007 +++++++++++++++++++++++---------- 2 files changed, 871 insertions(+), 433 deletions(-) diff --git a/arch/sw_64/net/bpf_jit.h b/arch/sw_64/net/bpf_jit.h index 2bf3ca6f3abd..e4c96995bd96 100644 --- a/arch/sw_64/net/bpf_jit.h +++ b/arch/sw_64/net/bpf_jit.h @@ -21,80 +21,82 @@ #ifndef _SW64_BPF_JIT_H #define _SW64_BPF_JIT_H +/* SW64 instruction field shift */ #define SW64_BPF_OPCODE_OFFSET 26 #define SW64_BPF_RA_OFFSET 21 #define SW64_BPF_RB_OFFSET 16 #define SW64_BPF_SIMPLE_ALU_IMM_OFFSET 13 #define SW64_BPF_SIMPLE_ALU_FUNC_OFFSET 5 #define SW64_BPF_SIMPLE_ALU_RC_OFFSET 0 +#define SW64_BPF_LS_FUNC_OFFSET 12 -#define SW64_BPF_OPCODE_BR_CALL 0x01 -#define SW64_BPF_OPCODE_BR_RET 0x02 -#define SW64_BPF_OPCODE_BR_JMP 0x03 -#define SW64_BPF_OPCODE_BR_BR 0x04 -#define SW64_BPF_OPCODE_BR_BSR 0x05 -#define SW64_BPF_OPCODE_BR_BEQ 0x30 -#define SW64_BPF_OPCODE_BR_BNE 0x31 -#define SW64_BPF_OPCODE_BR_BLT 0x32 -#define SW64_BPF_OPCODE_BR_BLE 0x33 -#define SW64_BPF_OPCODE_BR_BGT 0x34 -#define SW64_BPF_OPCODE_BR_BGE 0x35 -#define SW64_BPF_OPCODE_BR_BLBC 0x36 -#define SW64_BPF_OPCODE_BR_BLBS 0x37 - -#define SW64_BPF_OPCODE_LS_LDBU 0x20 -#define SW64_BPF_OPCODE_LS_LDHU 0x21 -#define SW64_BPF_OPCODE_LS_LDW 0x22 -#define SW64_BPF_OPCODE_LS_LDL 0x23 -#define SW64_BPF_OPCODE_LS_STB 0x28 -#define SW64_BPF_OPCODE_LS_STH 0x29 -#define SW64_BPF_OPCODE_LS_STW 0x2A -#define SW64_BPF_OPCODE_LS_STL 0x2B -#define SW64_BPF_OPCODE_LS_LDI 0x3E -#define SW64_BPF_OPCODE_LS_LDIH 0x3F - +/* SW64 instruction opcodes */ +#define SW64_BPF_OPCODE_CALL 0x01 +#define SW64_BPF_OPCODE_RET 0x02 +#define SW64_BPF_OPCODE_JMP 0x03 +#define SW64_BPF_OPCODE_BR 0x04 +#define SW64_BPF_OPCODE_BSR 0x05 +#define SW64_BPF_OPCODE_MISC 0x06 +#define SW64_BPF_OPCODE_LOCK 0x08 #define SW64_BPF_OPCODE_ALU_REG 0x10 #define SW64_BPF_OPCODE_ALU_IMM 0x12 +#define SW64_BPF_OPCODE_LDBU 0x20 +#define SW64_BPF_OPCODE_LDHU 0x21 +#define SW64_BPF_OPCODE_LDW 0x22 +#define SW64_BPF_OPCODE_LDL 0x23 +#define SW64_BPF_OPCODE_STB 0x28 +#define SW64_BPF_OPCODE_STH 0x29 +#define SW64_BPF_OPCODE_STW 0x2A +#define SW64_BPF_OPCODE_STL 0x2B +#define SW64_BPF_OPCODE_BEQ 0x30 +#define SW64_BPF_OPCODE_BNE 0x31 +#define SW64_BPF_OPCODE_BLT 0x32 +#define SW64_BPF_OPCODE_BLE 0x33 +#define SW64_BPF_OPCODE_BGT 0x34 +#define SW64_BPF_OPCODE_BGE 0x35 +#define SW64_BPF_OPCODE_BLBC 0x36 +#define SW64_BPF_OPCODE_BLBS 0x37 +#define SW64_BPF_OPCODE_LDI 0x3E +#define SW64_BPF_OPCODE_LDIH 0x3F + +/* SW64 MISC instructions function codes */ +#define SW64_BPF_FUNC_MISC_RD_F 0x1000 +#define SW64_BPF_FUNC_MISC_WR_F 0x1020 +/* SW64 LOCK instructions function codes */ +#define SW64_BPF_FUNC_LOCK_LLDW 0x0 +#define SW64_BPF_FUNC_LOCK_LLDL 0x1 +#define SW64_BPF_FUNC_LOCK_LSTW 0x8 +#define SW64_BPF_FUNC_LOCK_LSTL 0x9 + +/* SW64 ALU instructions function codes */ #define SW64_BPF_FUNC_ALU_ADDW 0x00 #define SW64_BPF_FUNC_ALU_SUBW 0x01 #define SW64_BPF_FUNC_ALU_ADDL 0x08 #define SW64_BPF_FUNC_ALU_SUBL 0x09 #define SW64_BPF_FUNC_ALU_MULW 0x10 #define SW64_BPF_FUNC_ALU_MULL 0x18 +#define SW64_BPF_FUNC_ALU_CMPEQ 0x28 +#define SW64_BPF_FUNC_ALU_CMPLT 0x29 +#define SW64_BPF_FUNC_ALU_CMPLE 0x2A +#define SW64_BPF_FUNC_ALU_CMPULT 0x2B +#define SW64_BPF_FUNC_ALU_CMPULE 0x2C +#define SW64_BPF_FUNC_ALU_AND 0x38 +#define SW64_BPF_FUNC_ALU_BIC 0x39 +#define SW64_BPF_FUNC_ALU_BIS 0x3A +#define SW64_BPF_FUNC_ALU_ORNOT 0x3B +#define SW64_BPF_FUNC_ALU_XOR 0x3C +#define SW64_BPF_FUNC_ALU_EQV 0x3D +#define SW64_BPF_FUNC_ALU_SLL 0x48 +#define SW64_BPF_FUNC_ALU_SRL 0x49 +#define SW64_BPF_FUNC_ALU_SRA 0x4A #define SW64_BPF_FUNC_ALU_ZAP 0x68 #define SW64_BPF_FUNC_ALU_ZAPNOT 0x69 #define SW64_BPF_FUNC_ALU_SEXTB 0x6A #define SW64_BPF_FUNC_ALU_SEXTH 0x6B -#define SW64_BPF_OPCODE_BS_REG 0x10 -#define SW64_BPF_OPCODE_BS_IMM 0x12 - -#define SW64_BPF_FUNC_BS_SLL 0x48 -#define SW64_BPF_FUNC_BS_SRL 0x49 -#define SW64_BPF_FUNC_BS_SRA 0x4A - -#define SW64_BPF_OPCODE_LOGIC_REG 0x10 -#define SW64_BPF_OPCODE_LOGIC_IMM 0x12 - -#define SW64_BPF_FUNC_LOGIC_AND 0x38 -#define SW64_BPF_FUNC_LOGIC_BIC 0x39 -#define SW64_BPF_FUNC_LOGIC_BIS 0x3A -#define SW64_BPF_FUNC_LOGIC_ORNOT 0x3B -#define SW64_BPF_FUNC_LOGIC_XOR 0x3C -#define SW64_BPF_FUNC_LOGIC_EQV 0x3D - -#define SW64_BPF_OPCODE_CMP_REG 0x10 -#define SW64_BPF_OPCODE_CMP_IMM 0x12 - -#define SW64_BPF_FUNC_CMP_EQ 0x28 -#define SW64_BPF_FUNC_CMP_LT 0x29 -#define SW64_BPF_FUNC_CMP_LE 0x2A -#define SW64_BPF_FUNC_CMP_ULT 0x2B -#define SW64_BPF_FUNC_CMP_ULE 0x2C - /* special instuction used in jit_fill_hole() */ -#define SW64_BPF_ILLEGAL_INSN ((1 << 25) | 0x80) +#define SW64_BPF_ILLEGAL_INSN (0x1bff1000) /* rd_f $31 */ enum sw64_bpf_registers { SW64_BPF_REG_V0 = 0, /* keep return value */ @@ -135,25 +137,45 @@ enum sw64_bpf_registers { /* SW64 load and store instructions */ #define SW64_BPF_LDBU(dst, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDBU, dst, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDBU, dst, rb, offset16) #define SW64_BPF_LDHU(dst, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDHU, dst, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDHU, dst, rb, offset16) #define SW64_BPF_LDW(dst, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDW, dst, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDW, dst, rb, offset16) #define SW64_BPF_LDL(dst, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDL, dst, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDL, dst, rb, offset16) #define SW64_BPF_STB(src, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STB, src, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STB, src, rb, offset16) #define SW64_BPF_STH(src, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STH, src, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STH, src, rb, offset16) #define SW64_BPF_STW(src, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STW, src, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STW, src, rb, offset16) #define SW64_BPF_STL(src, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STL, src, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STL, src, rb, offset16) #define SW64_BPF_LDI(dst, rb, imm16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDI, dst, rb, imm16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDI, dst, rb, imm16) #define SW64_BPF_LDIH(dst, rb, imm16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDIH, dst, rb, imm16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDIH, dst, rb, imm16) + +/* SW64 lock instructions */ +#define SW64_BPF_LLDW(ra, rb, offset16) \ + sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \ + ra, rb, offset16, SW64_BPF_FUNC_LOCK_LLDW) +#define SW64_BPF_LLDL(ra, rb, offset16) \ + sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \ + ra, rb, offset16, SW64_BPF_FUNC_LOCK_LLDL) +#define SW64_BPF_LSTW(ra, rb, offset16) \ + sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \ + ra, rb, offset16, SW64_BPF_FUNC_LOCK_LSTW) +#define SW64_BPF_LSTL(ra, rb, offset16) \ + sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \ + ra, rb, offset16, SW64_BPF_FUNC_LOCK_LSTL) +#define SW64_BPF_RD_F(ra) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_MISC, \ + ra, SW64_BPF_REG_ZR, SW64_BPF_FUNC_MISC_RD_F) +#define SW64_BPF_WR_F(ra) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_MISC, \ + ra, SW64_BPF_REG_ZR, SW64_BPF_FUNC_MISC_WR_F) /* SW64 ALU instructions REG format */ #define SW64_BPF_ADDW_REG(ra, rb, dst) \ @@ -182,10 +204,10 @@ enum sw64_bpf_registers { ra, rb, dst, SW64_BPF_FUNC_ALU_ZAPNOT) #define SW64_BPF_SEXTB_REG(rb, dst) \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ - 0, rb, dst, SW64_BPF_FUNC_ALU_SEXTB) + SW64_BPF_REG_ZR, rb, dst, SW64_BPF_FUNC_ALU_SEXTB) #define SW64_BPF_SEXTH_REG(rb, dst) \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ - 0, rb, dst, SW64_BPF_FUNC_ALU_SEXTH) + SW64_BPF_REG_ZR, rb, dst, SW64_BPF_FUNC_ALU_SEXTH) /* SW64 ALU instructions IMM format */ #define SW64_BPF_ADDW_IMM(ra, imm8, dst) \ @@ -214,130 +236,133 @@ enum sw64_bpf_registers { ra, imm8, dst, SW64_BPF_FUNC_ALU_ZAPNOT) #define SW64_BPF_SEXTB_IMM(imm8, dst) \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ - 0, imm8, dst, SW64_BPF_FUNC_ALU_SEXTB) + SW64_BPF_REG_ZR, imm8, dst, SW64_BPF_FUNC_ALU_SEXTB) +#define SW64_BPF_SEXTH_IMM(imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + SW64_BPF_REG_ZR, imm8, dst, SW64_BPF_FUNC_ALU_SEXTH) /* SW64 bit shift instructions REG format */ #define SW64_BPF_SLL_REG(src, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_BS_REG, \ - src, rb, dst, SW64_BPF_FUNC_BS_SLL) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + src, rb, dst, SW64_BPF_FUNC_ALU_SLL) #define SW64_BPF_SRL_REG(src, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_BS_REG, \ - src, rb, dst, SW64_BPF_FUNC_BS_SRL) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + src, rb, dst, SW64_BPF_FUNC_ALU_SRL) #define SW64_BPF_SRA_REG(src, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_BS_REG, \ - src, rb, dst, SW64_BPF_FUNC_BS_SRA) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + src, rb, dst, SW64_BPF_FUNC_ALU_SRA) /* SW64 bit shift instructions IMM format */ #define SW64_BPF_SLL_IMM(src, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_BS_IMM, \ - src, imm8, dst, SW64_BPF_FUNC_BS_SLL) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + src, imm8, dst, SW64_BPF_FUNC_ALU_SLL) #define SW64_BPF_SRL_IMM(src, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_BS_IMM, \ - src, imm8, dst, SW64_BPF_FUNC_BS_SRL) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + src, imm8, dst, SW64_BPF_FUNC_ALU_SRL) #define SW64_BPF_SRA_IMM(src, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_BS_IMM, \ - src, imm8, dst, SW64_BPF_FUNC_BS_SRA) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + src, imm8, dst, SW64_BPF_FUNC_ALU_SRA) /* SW64 control instructions */ #define SW64_BPF_CALL(ra, rb) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_BR_CALL, ra, rb, 0) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_CALL, ra, rb, 0) #define SW64_BPF_RET(rb) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_BR_RET, SW64_BPF_REG_ZR, rb, 0) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_RET, SW64_BPF_REG_ZR, rb, 0) #define SW64_BPF_JMP(ra, rb) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_BR_JMP, ra, rb, 0) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_JMP, ra, rb, 0) #define SW64_BPF_BR(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BR, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR, ra, offset) #define SW64_BPF_BSR(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BSR, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BSR, ra, offset) #define SW64_BPF_BEQ(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BEQ, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BEQ, ra, offset) #define SW64_BPF_BNE(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BNE, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BNE, ra, offset) #define SW64_BPF_BLT(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLT, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLT, ra, offset) #define SW64_BPF_BLE(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLE, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLE, ra, offset) #define SW64_BPF_BGT(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BGT, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BGT, ra, offset) #define SW64_BPF_BGE(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BGE, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BGE, ra, offset) #define SW64_BPF_BLBC(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLBC, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLBC, ra, offset) #define SW64_BPF_BLBS(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLBS, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLBS, ra, offset) /* SW64 bit logic instructions REG format */ #define SW64_BPF_AND_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ - ra, rb, dst, SW64_BPF_FUNC_LOGIC_AND) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_AND) #define SW64_BPF_ANDNOT_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ - ra, rb, dst, SW64_BPF_FUNC_LOGIC_BIC) -#define SW64_BPF_OR_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ - ra, rb, dst, SW64_BPF_FUNC_LOGIC_BIS) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_BIC) +#define SW64_BPF_BIS_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_BIS) #define SW64_BPF_ORNOT_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ - ra, rb, dst, SW64_BPF_FUNC_LOGIC_ORNOT) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_ORNOT) #define SW64_BPF_XOR_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ - ra, rb, dst, SW64_BPF_FUNC_LOGIC_XOR) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_XOR) #define SW64_BPF_EQV_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ - ra, rb, dst, SW64_BPF_FUNC_LOGIC_EQV) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_EQV) /* SW64 bit logic instructions IMM format */ #define SW64_BPF_AND_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_LOGIC_AND) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_AND) #define SW64_BPF_ANDNOT_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_LOGIC_BIC) -#define SW64_BPF_OR_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_LOGIC_BIS) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_BIC) +#define SW64_BPF_BIS_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_BIS) #define SW64_BPF_ORNOT_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_LOGIC_ORNOT) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_ORNOT) #define SW64_BPF_XOR_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_LOGIC_XOR) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_XOR) #define SW64_BPF_EQV_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_LOGIC_EQV) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_EQV) /* SW64 compare instructions REG format */ #define SW64_BPF_CMPEQ_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ - ra, rb, dst, SW64_BPF_FUNC_CMP_EQ) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPEQ) #define SW64_BPF_CMPLT_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ - ra, rb, dst, SW64_BPF_FUNC_CMP_LT) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPLT) #define SW64_BPF_CMPLE_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ - ra, rb, dst, SW64_BPF_FUNC_CMP_LE) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPLE) #define SW64_BPF_CMPULT_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ - ra, rb, dst, SW64_BPF_FUNC_CMP_ULT) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPULT) #define SW64_BPF_CMPULE_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ - ra, rb, dst, SW64_BPF_FUNC_CMP_ULE) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPULE) /* SW64 compare instructions imm format */ #define SW64_BPF_CMPEQ_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_CMP_EQ) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPEQ) #define SW64_BPF_CMPLT_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_CMP_LT) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPLT) #define SW64_BPF_CMPLE_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_CMP_LE) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPLE) #define SW64_BPF_CMPULT_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_CMP_ULT) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPULT) #define SW64_BPF_CMPULE_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_CMP_ULE) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPULE) #endif /* _SW64_BPF_JIT_H */ diff --git a/arch/sw_64/net/bpf_jit_comp.c b/arch/sw_64/net/bpf_jit_comp.c index 102de82d69e1..f1e471a0789b 100644 --- a/arch/sw_64/net/bpf_jit_comp.c +++ b/arch/sw_64/net/bpf_jit_comp.c @@ -29,44 +29,34 @@ #include "bpf_jit.h" -#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) -#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) -#define TCALL_CNT (MAX_BPF_JIT_REG + 2) - -/* - * TO-DO List: - * DIV - * MOD - */ +#define TCALL_CNT (MAX_BPF_JIT_REG + 0) static const int bpf2sw64[] = { /* return value from in-kernel function, and exit value from eBPF */ [BPF_REG_0] = SW64_BPF_REG_V0, /* arguments from eBPF program to in-kernel function */ - [BPF_REG_1] = SW64_BPF_REG_A1, - [BPF_REG_2] = SW64_BPF_REG_A2, - [BPF_REG_3] = SW64_BPF_REG_A3, - [BPF_REG_4] = SW64_BPF_REG_A4, - [BPF_REG_5] = SW64_BPF_REG_A5, + [BPF_REG_1] = SW64_BPF_REG_A0, + [BPF_REG_2] = SW64_BPF_REG_A1, + [BPF_REG_3] = SW64_BPF_REG_A2, + [BPF_REG_4] = SW64_BPF_REG_A3, + [BPF_REG_5] = SW64_BPF_REG_A4, /* callee saved registers that in-kernel function will preserve */ - [BPF_REG_6] = SW64_BPF_REG_S1, - [BPF_REG_7] = SW64_BPF_REG_S2, - [BPF_REG_8] = SW64_BPF_REG_S3, - [BPF_REG_9] = SW64_BPF_REG_S4, + [BPF_REG_6] = SW64_BPF_REG_S0, + [BPF_REG_7] = SW64_BPF_REG_S1, + [BPF_REG_8] = SW64_BPF_REG_S2, + [BPF_REG_9] = SW64_BPF_REG_S3, /* read-only frame pointer to access stack */ - [BPF_REG_FP] = SW64_BPF_REG_S0, - /* temporary registers for internal BPF JIT */ - [TMP_REG_1] = SW64_BPF_REG_T1, - [TMP_REG_2] = SW64_BPF_REG_T2, + [BPF_REG_FP] = SW64_BPF_REG_FP, /* tail_call_cnt */ - [TCALL_CNT] = SW64_BPF_REG_S5, + [TCALL_CNT] = SW64_BPF_REG_S4, /* temporary register for blinding constants */ - [BPF_REG_AX] = SW64_BPF_REG_T12, + [BPF_REG_AX] = SW64_BPF_REG_T11, }; struct jit_ctx { const struct bpf_prog *prog; int idx; // JITed instruction index + int current_tmp_reg; int epilogue_offset; int *insn_offset; // [bpf_insn_idx] = jited_insn_idx u32 *image; // JITed instruction @@ -83,7 +73,7 @@ static inline u32 sw64_bpf_gen_format_br(int opcode, enum sw64_bpf_registers ra, { opcode = opcode << SW64_BPF_OPCODE_OFFSET; ra = ra << SW64_BPF_RA_OFFSET; - return opcode | ra | disp; + return opcode | ra | (disp & 0x1fffff); } static inline u32 sw64_bpf_gen_format_ls(int opcode, enum sw64_bpf_registers ra, @@ -92,7 +82,17 @@ static inline u32 sw64_bpf_gen_format_ls(int opcode, enum sw64_bpf_registers ra, opcode = opcode << SW64_BPF_OPCODE_OFFSET; ra = ra << SW64_BPF_RA_OFFSET; rb = rb << SW64_BPF_RB_OFFSET; - return opcode | ra | rb | disp; + return opcode | ra | rb | (disp & 0xffff); +} + +static inline u32 sw64_bpf_gen_format_ls_func(int opcode, enum sw64_bpf_registers ra, + enum sw64_bpf_registers rb, u16 disp, int function) +{ + opcode = opcode << SW64_BPF_OPCODE_OFFSET; + ra = ra << SW64_BPF_RA_OFFSET; + rb = rb << SW64_BPF_RB_OFFSET; + function = function << SW64_BPF_LS_FUNC_OFFSET; + return opcode | ra | rb | function | (disp & 0xfff); } static inline u32 sw64_bpf_gen_format_simple_alu_reg(int opcode, enum sw64_bpf_registers ra, @@ -107,12 +107,12 @@ static inline u32 sw64_bpf_gen_format_simple_alu_reg(int opcode, enum sw64_bpf_r } static inline u32 sw64_bpf_gen_format_simple_alu_imm(int opcode, enum sw64_bpf_registers ra, - enum sw64_bpf_registers rc, u8 imm, int function) + u32 imm, enum sw64_bpf_registers rc, int function) { opcode = opcode << SW64_BPF_OPCODE_OFFSET; ra = ra << SW64_BPF_RA_OFFSET; + imm = (imm & 0xff) << SW64_BPF_SIMPLE_ALU_IMM_OFFSET; rc = rc << SW64_BPF_SIMPLE_ALU_RC_OFFSET; - imm = imm << SW64_BPF_SIMPLE_ALU_IMM_OFFSET; function = function << SW64_BPF_SIMPLE_ALU_FUNC_OFFSET; return opcode | ra | imm | function | rc; } @@ -125,57 +125,85 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx) ctx->idx++; } -static inline void emit_sw64_ldu64(const int dst, const u64 imm64, struct jit_ctx *ctx) +static inline int get_tmp_reg(struct jit_ctx *ctx) { - u16 imm_tmp; - int reg_tmp = SW64_BPF_REG_T8; - - imm_tmp = (imm64 >> 60) & 0xf; - emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx); - emit(SW64_BPF_SLL_IMM(dst, 60, dst), ctx); - - imm_tmp = (imm64 >> 45) & 0x7fff; - emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); - emit(SW64_BPF_SLL_IMM(reg_tmp, 45, reg_tmp), ctx); - emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); - - imm_tmp = (imm64 >> 30) & 0x7fff; - emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); - emit(SW64_BPF_SLL_IMM(reg_tmp, 30, reg_tmp), ctx); - emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); - - imm_tmp = (imm64 >> 15) & 0x7fff; - emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); - emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); - emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + ctx->current_tmp_reg++; + /* Do not use 22-25. Should be more than enough. */ + if (unlikely(ctx->current_tmp_reg == 8)) { + pr_err("eBPF JIT %s[%d]: not enough temporary registers!\n", + current->comm, current->pid); + return -1; + } + return ctx->current_tmp_reg; +} - imm_tmp = imm64 & 0x7fff; - emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx); +static inline void put_tmp_reg(struct jit_ctx *ctx) +{ + ctx->current_tmp_reg--; + if (ctx->current_tmp_reg == 21) + ctx->current_tmp_reg = 7; } -static inline void emit_sw64_ldu32(const int dst, const u32 imm32, struct jit_ctx *ctx) +static void emit_sw64_ldu32(const int dst, const u32 imm, struct jit_ctx *ctx) { u16 imm_tmp; - int reg_tmp = SW64_BPF_REG_T8; + u8 reg_tmp = get_tmp_reg(ctx); + + if (!imm) { + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx); + put_tmp_reg(ctx); + return; + } + + if (imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + put_tmp_reg(ctx); + return; + } + + if (imm >= U32_MAX - S16_MAX) { + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + put_tmp_reg(ctx); + return; + } - imm_tmp = (imm32 >> 30) & 3; + imm_tmp = (imm >> 30) & 3; emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx); - emit(SW64_BPF_SLL_IMM(dst, 30, dst), ctx); + if (imm_tmp) + emit(SW64_BPF_SLL_IMM(dst, 30, dst), ctx); - imm_tmp = (imm32 >> 15) & 0x7fff; - emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); - emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); - emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + imm_tmp = (imm >> 15) & 0x7fff; + if (imm_tmp) { + emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); + emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); + emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + } - imm_tmp = imm32 & 0x7fff; - emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx); + imm_tmp = imm & 0x7fff; + if (imm_tmp) + emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx); + + put_tmp_reg(ctx); } -static inline void emit_sw64_lds32(const int dst, const s32 imm32, struct jit_ctx *ctx) +static void emit_sw64_lds32(const int dst, const s32 imm, struct jit_ctx *ctx) { - s16 hi = imm32 >> 16; - s16 lo = imm32 & 0xffff; - int reg_tmp = SW64_BPF_REG_T8; + s16 hi = imm >> 16; + s16 lo = imm & 0xffff; + u8 reg_tmp = get_tmp_reg(ctx); + + if (!imm) { + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx); + put_tmp_reg(ctx); + return; + } + + if (imm >= S16_MIN && imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + put_tmp_reg(ctx); + return; + } emit(SW64_BPF_LDIH(dst, SW64_BPF_REG_ZR, hi), ctx); if (lo & 0x8000) { // sign bit is 1 @@ -183,106 +211,299 @@ static inline void emit_sw64_lds32(const int dst, const s32 imm32, struct jit_ct emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, 1), ctx); emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); - emit(SW64_BPF_LDI(dst, dst, lo), ctx); + if (lo) + emit(SW64_BPF_LDI(dst, dst, lo), ctx); } else { // sign bit is 0 - emit(SW64_BPF_LDI(dst, dst, lo), ctx); + if (lo) + emit(SW64_BPF_LDI(dst, dst, lo), ctx); + } + + put_tmp_reg(ctx); +} + +static void emit_sw64_ldu64(const int dst, const u64 imm, struct jit_ctx *ctx) +{ + u16 imm_tmp; + u8 reg_tmp = get_tmp_reg(ctx); + + if (!imm) { + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx); + put_tmp_reg(ctx); + return; + } + + if (imm <= U32_MAX) { + put_tmp_reg(ctx); + return emit_sw64_ldu32(dst, (u32)imm, ctx); + } + + if (imm >= (U64_MAX - S16_MAX) || imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + put_tmp_reg(ctx); + return; + } + + imm_tmp = (imm >> 60) & 0xf; + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx); + if (imm_tmp) + emit(SW64_BPF_SLL_IMM(dst, 60, dst), ctx); + + imm_tmp = (imm >> 45) & 0x7fff; + if (imm_tmp) { + emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); + emit(SW64_BPF_SLL_IMM(reg_tmp, 45, reg_tmp), ctx); + emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + } + + imm_tmp = (imm >> 30) & 0x7fff; + if (imm_tmp) { + emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); + emit(SW64_BPF_SLL_IMM(reg_tmp, 30, reg_tmp), ctx); + emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); } + + imm_tmp = (imm >> 15) & 0x7fff; + if (imm_tmp) { + emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); + emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); + emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + } + + imm_tmp = imm & 0x7fff; + if (imm_tmp) + emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx); + + put_tmp_reg(ctx); } -/* dst = ra / rb */ -static void emit_sw64_div(const int ra, const int rb, const int dst, struct jit_ctx *ctx) +/* Do not change!!! See arch/sw_64/lib/divide.S for more detail */ +#define REG(x) "$"str(x) +#define str(x) #x +#define DIVIDEND 24 +#define DIVISOR 25 +#define RESULT 27 +/* Make these functions noinline because we need their address at runtime */ +noinline void sw64_bpf_jit_helper_div32(void) { - pr_err("DIV is not supported for now.\n"); + register u32 __dividend asm(REG(DIVIDEND)); + register u32 __divisor asm(REG(DIVISOR)); + u32 res = __dividend / __divisor; + + asm volatile( + "" + :: "r"(res)); } -/* dst = ra % rb */ -static void emit_sw64_mod(const int ra, const int rb, const int dst, struct jit_ctx *ctx) +noinline void sw64_bpf_jit_helper_mod32(void) { - pr_err("MOD is not supported for now.\n"); + register u32 __dividend asm(REG(DIVIDEND)); + register u32 __divisor asm(REG(DIVISOR)); + u32 res = __dividend % __divisor; + + asm volatile( + "" + :: "r"(res)); +} + +noinline void sw64_bpf_jit_helper_div64(void) +{ + register s64 __dividend asm(REG(DIVIDEND)); + register s64 __divisor asm(REG(DIVISOR)); + s64 res = __dividend / __divisor; + + asm volatile( + "" + :: "r"(res)); +} + +noinline void sw64_bpf_jit_helper_mod64(void) +{ + register s64 __dividend asm(REG(DIVIDEND)); + register s64 __divisor asm(REG(DIVISOR)); + s64 res = __dividend % __divisor; + + asm volatile( + "" + :: "r"(res)); +} + +static void emit_sw64_divmod(const int dst, const int src, struct jit_ctx *ctx, u8 code) +{ + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, dst, DIVIDEND), ctx); + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, DIVISOR), ctx); + switch (BPF_CLASS(code)) { + case BPF_ALU: + switch (BPF_OP(code)) { + case BPF_DIV: + emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)sw64_bpf_jit_helper_div32, ctx); + break; + case BPF_MOD: + emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)sw64_bpf_jit_helper_mod32, ctx); + break; + } + emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx); + emit(SW64_BPF_ZAP_IMM(RESULT, 0xf0, dst), ctx); + break; + case BPF_ALU64: + switch (BPF_OP(code)) { + case BPF_DIV: + emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)sw64_bpf_jit_helper_div64, ctx); + break; + case BPF_MOD: + emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)sw64_bpf_jit_helper_mod64, ctx); + break; + } + emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx); + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, RESULT, dst), ctx); + break; + } +} + +#undef REG +#undef str +#undef DIVIDEND +#undef DIVISOR +#undef RESULT + +/* STX XADD: lock *(u32 *)(dst + off) += src */ +static void emit_sw64_xadd32(const int src, int dst, s16 off, struct jit_ctx *ctx) +{ + int atomic_start; + int atomic_end; + u8 tmp1 = get_tmp_reg(ctx); + u8 tmp2 = get_tmp_reg(ctx); + u8 tmp3 = get_tmp_reg(ctx); + + if (off < -0x800 || off > 0x7ff) { + emit(SW64_BPF_LDI(tmp1, dst, off), ctx); + dst = tmp1; + off = 0; + } + + atomic_start = ctx->idx; + emit(SW64_BPF_LLDW(tmp2, dst, off), ctx); + emit(SW64_BPF_LDI(tmp3, SW64_BPF_REG_ZR, 1), ctx); + emit(SW64_BPF_WR_F(tmp3), ctx); + emit(SW64_BPF_ADDW_REG(tmp2, src, tmp2), ctx); + if (ctx->idx & 1) + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); + emit(SW64_BPF_LSTW(tmp2, dst, off), ctx); + emit(SW64_BPF_RD_F(tmp3), ctx); + atomic_end = ctx->idx; + emit(SW64_BPF_BEQ(tmp3, atomic_start - atomic_end - 1), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); + put_tmp_reg(ctx); +} + +/* STX XADD: lock *(u64 *)(dst + off) += src */ +static void emit_sw64_xadd64(const int src, int dst, s16 off, struct jit_ctx *ctx) +{ + int atomic_start; + int atomic_end; + u8 tmp1 = get_tmp_reg(ctx); + u8 tmp2 = get_tmp_reg(ctx); + u8 tmp3 = get_tmp_reg(ctx); + + if (off < -0x800 || off > 0x7ff) { + emit(SW64_BPF_LDI(tmp1, dst, off), ctx); + dst = tmp1; + off = 0; + } + + atomic_start = ctx->idx; + emit(SW64_BPF_LLDL(tmp2, dst, off), ctx); + emit(SW64_BPF_LDI(tmp3, SW64_BPF_REG_ZR, 1), ctx); + emit(SW64_BPF_WR_F(tmp3), ctx); + emit(SW64_BPF_ADDL_REG(tmp2, src, tmp2), ctx); + if (ctx->idx & 1) + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); + emit(SW64_BPF_LSTL(tmp2, dst, off), ctx); + emit(SW64_BPF_RD_F(tmp3), ctx); + atomic_end = ctx->idx; + emit(SW64_BPF_BEQ(tmp3, atomic_start - atomic_end - 1), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); + put_tmp_reg(ctx); } static void emit_sw64_htobe16(const int dst, struct jit_ctx *ctx) { - int tmp = SW64_BPF_REG_T8; + u8 tmp = get_tmp_reg(ctx); - emit(SW64_BPF_LDI(tmp, dst, 0), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp, 0x2, tmp), ctx); + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x2, tmp), ctx); emit(SW64_BPF_ZAPNOT_IMM(dst, 0x1, dst), ctx); - emit(SW64_BPF_SRL_REG(tmp, 8, tmp), ctx); - emit(SW64_BPF_SLL_REG(dst, 8, dst), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp, dst), ctx); + emit(SW64_BPF_SRL_IMM(tmp, 8, tmp), ctx); + emit(SW64_BPF_SLL_IMM(dst, 8, dst), ctx); + emit(SW64_BPF_BIS_REG(dst, tmp, dst), ctx); + + put_tmp_reg(ctx); } static void emit_sw64_htobe32(const int dst, struct jit_ctx *ctx) { - int tmp1 = SW64_BPF_REG_T8; - int tmp2 = SW64_BPF_REG_T9; + u8 tmp1 = get_tmp_reg(ctx); + u8 tmp2 = get_tmp_reg(ctx); - emit(SW64_BPF_LDI(tmp1, dst, 0), ctx); - emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x1, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x8, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(dst, 0x6, dst), ctx); - emit(SW64_BPF_SLL_IMM(tmp1, 24, tmp1), ctx); - emit(SW64_BPF_SRL_IMM(tmp2, 24, tmp2), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); - - emit(SW64_BPF_LDI(tmp1, dst, 0), ctx); - emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x2, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x4, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(dst, 0x9, dst), ctx); + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x8, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 24, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x4, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 8, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x2, tmp1), ctx); emit(SW64_BPF_SLL_IMM(tmp1, 8, tmp1), ctx); - emit(SW64_BPF_SRL_IMM(tmp2, 8, tmp2), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x1, dst), ctx); + emit(SW64_BPF_SLL_IMM(dst, 24, dst), ctx); + emit(SW64_BPF_BIS_REG(dst, tmp2, dst), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); } static void emit_sw64_htobe64(const int dst, struct jit_ctx *ctx) { - int tmp1 = SW64_BPF_REG_T8; - int tmp2 = SW64_BPF_REG_T9; - - emit(SW64_BPF_LDI(tmp1, dst, 0), ctx); - emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x1, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x80, tmp1), ctx); - emit(SW64_BPF_ZAP_IMM(dst, 0x81, dst), ctx); - emit(SW64_BPF_SLL_IMM(tmp1, 56, tmp1), ctx); - emit(SW64_BPF_SRL_IMM(tmp2, 56, tmp2), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); - - emit(SW64_BPF_LDI(tmp1, dst, 0), ctx); - emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x2, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x40, tmp1), ctx); - emit(SW64_BPF_ZAP_IMM(dst, 0x42, dst), ctx); - emit(SW64_BPF_SLL_IMM(tmp1, 40, tmp1), ctx); - emit(SW64_BPF_SRL_IMM(tmp2, 40, tmp2), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); - - emit(SW64_BPF_LDI(tmp1, dst, 0), ctx); - emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x4, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x20, tmp1), ctx); - emit(SW64_BPF_ZAP_IMM(dst, 0x24, dst), ctx); - emit(SW64_BPF_SLL_IMM(tmp1, 24, tmp1), ctx); - emit(SW64_BPF_SRL_IMM(tmp2, 24, tmp2), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); - - emit(SW64_BPF_LDI(tmp1, dst, 0), ctx); - emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x8, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x10, tmp1), ctx); - emit(SW64_BPF_ZAP_IMM(dst, 0x18, dst), ctx); + u8 tmp1 = get_tmp_reg(ctx); + u8 tmp2 = get_tmp_reg(ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x80, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 56, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x40, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 40, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x20, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 24, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x10, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 8, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x08, tmp1), ctx); emit(SW64_BPF_SLL_IMM(tmp1, 8, tmp1), ctx); - emit(SW64_BPF_SRL_IMM(tmp2, 8, tmp2), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x04, tmp1), ctx); + emit(SW64_BPF_SLL_IMM(tmp1, 24, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x02, tmp1), ctx); + emit(SW64_BPF_SLL_IMM(tmp1, 40, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x01, dst), ctx); + emit(SW64_BPF_SLL_IMM(dst, 56, dst), ctx); + emit(SW64_BPF_BIS_REG(dst, tmp2, dst), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); } static void jit_fill_hole(void *area, unsigned int size) @@ -290,107 +511,117 @@ static void jit_fill_hole(void *area, unsigned int size) memset(area, SW64_BPF_ILLEGAL_INSN, size); } +static int bpf2sw64_offset(int bpf_idx, s32 off, const struct jit_ctx *ctx) +{ + int from = ctx->insn_offset[bpf_idx]; + int to = ctx->insn_offset[bpf_idx + off]; + + if (ctx->image == NULL) + return 0; + + return to - from; +} + static int offset_to_epilogue(const struct jit_ctx *ctx) { + if (ctx->image == NULL) + return 0; + return ctx->epilogue_offset - ctx->idx; } -/* For tail call to jump into */ -#define PROLOGUE_OFFSET 8 +/* For tail call, jump to set up function call stack */ +#define PROLOGUE_OFFSET 11 static void build_prologue(struct jit_ctx *ctx, bool was_classic) { - const int r6 = bpf2sw64[BPF_REG_6]; - const int r7 = bpf2sw64[BPF_REG_7]; - const int r8 = bpf2sw64[BPF_REG_8]; - const int r9 = bpf2sw64[BPF_REG_9]; - const int fp = bpf2sw64[BPF_REG_FP]; - const int tcc = bpf2sw64[TCALL_CNT]; - const int tmp1 = bpf2sw64[TMP_REG_1]; + const u8 r6 = bpf2sw64[BPF_REG_6]; + const u8 r7 = bpf2sw64[BPF_REG_7]; + const u8 r8 = bpf2sw64[BPF_REG_8]; + const u8 r9 = bpf2sw64[BPF_REG_9]; + const u8 fp = bpf2sw64[BPF_REG_FP]; + const u8 tcc = bpf2sw64[TCALL_CNT]; /* Save callee-saved registers */ - emit(SW64_BPF_SUBL_REG(SW64_BPF_REG_SP, 56, SW64_BPF_REG_SP), ctx); - emit(SW64_BPF_STL(r6, SW64_BPF_REG_SP, 0), ctx); - emit(SW64_BPF_STL(r7, SW64_BPF_REG_SP, 8), ctx); - emit(SW64_BPF_STL(r8, SW64_BPF_REG_SP, 16), ctx); - emit(SW64_BPF_STL(r9, SW64_BPF_REG_SP, 24), ctx); - emit(SW64_BPF_STL(fp, SW64_BPF_REG_SP, 32), ctx); - emit(SW64_BPF_STL(tcc, SW64_BPF_REG_SP, 40), ctx); - emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 48), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -64), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 0), ctx); + emit(SW64_BPF_STL(fp, SW64_BPF_REG_SP, 8), ctx); + emit(SW64_BPF_STL(r6, SW64_BPF_REG_SP, 16), ctx); + emit(SW64_BPF_STL(r7, SW64_BPF_REG_SP, 24), ctx); + emit(SW64_BPF_STL(r8, SW64_BPF_REG_SP, 32), ctx); + emit(SW64_BPF_STL(r9, SW64_BPF_REG_SP, 40), ctx); + emit(SW64_BPF_STL(tcc, SW64_BPF_REG_SP, 48), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, 56), ctx); /* Set up BPF prog stack base register */ - emit(SW64_BPF_LDI(fp, SW64_BPF_REG_SP, 0), ctx); + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_SP, fp), ctx); if (!was_classic) /* Initialize tail_call_cnt */ - emit(SW64_BPF_LDI(tcc, SW64_BPF_REG_ZR, 0), ctx); + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, tcc), ctx); /* Set up function call stack */ - ctx->stack_size = ctx->prog->aux->stack_depth; - emit_sw64_ldu32(tmp1, ctx->stack_size, ctx); - emit(SW64_BPF_SUBL_REG(SW64_BPF_REG_SP, tmp1, SW64_BPF_REG_SP), ctx); + ctx->stack_size = (ctx->prog->aux->stack_depth + 15) & (~15); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -ctx->stack_size), ctx); } static void build_epilogue(struct jit_ctx *ctx) { - const int r6 = bpf2sw64[BPF_REG_6]; - const int r7 = bpf2sw64[BPF_REG_7]; - const int r8 = bpf2sw64[BPF_REG_8]; - const int r9 = bpf2sw64[BPF_REG_9]; - const int fp = bpf2sw64[BPF_REG_FP]; - const int tcc = bpf2sw64[TCALL_CNT]; - const int tmp1 = bpf2sw64[TMP_REG_1]; + const u8 r6 = bpf2sw64[BPF_REG_6]; + const u8 r7 = bpf2sw64[BPF_REG_7]; + const u8 r8 = bpf2sw64[BPF_REG_8]; + const u8 r9 = bpf2sw64[BPF_REG_9]; + const u8 fp = bpf2sw64[BPF_REG_FP]; + const u8 tcc = bpf2sw64[TCALL_CNT]; /* Destroy function call stack */ - emit_sw64_ldu32(tmp1, ctx->stack_size, ctx); - emit(SW64_BPF_ADDL_REG(SW64_BPF_REG_SP, tmp1, SW64_BPF_REG_SP), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, ctx->stack_size), ctx); /* Restore callee-saved registers */ - emit(SW64_BPF_LDL(r6, SW64_BPF_REG_SP, 0), ctx); - emit(SW64_BPF_LDL(r7, SW64_BPF_REG_SP, 8), ctx); - emit(SW64_BPF_LDL(r8, SW64_BPF_REG_SP, 16), ctx); - emit(SW64_BPF_LDL(r9, SW64_BPF_REG_SP, 24), ctx); - emit(SW64_BPF_LDL(fp, SW64_BPF_REG_SP, 32), ctx); - emit(SW64_BPF_LDL(tcc, SW64_BPF_REG_SP, 40), ctx); - emit(SW64_BPF_LDL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 48), ctx); - emit(SW64_BPF_ADDL_REG(SW64_BPF_REG_SP, 56, SW64_BPF_REG_SP), ctx); + emit(SW64_BPF_LDL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 0), ctx); + emit(SW64_BPF_LDL(fp, SW64_BPF_REG_SP, 8), ctx); + emit(SW64_BPF_LDL(r6, SW64_BPF_REG_SP, 16), ctx); + emit(SW64_BPF_LDL(r7, SW64_BPF_REG_SP, 24), ctx); + emit(SW64_BPF_LDL(r8, SW64_BPF_REG_SP, 32), ctx); + emit(SW64_BPF_LDL(r9, SW64_BPF_REG_SP, 40), ctx); + emit(SW64_BPF_LDL(tcc, SW64_BPF_REG_SP, 48), ctx); + emit(SW64_BPF_LDL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, 56), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, 64), ctx); /* Return */ emit(SW64_BPF_RET(SW64_BPF_REG_RA), ctx); } -static int out_offset = -1; /* initialized on the first pass of build_body() */ static int emit_bpf_tail_call(struct jit_ctx *ctx) { /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ const u8 r2 = bpf2sw64[BPF_REG_2]; /* struct bpf_array *array */ const u8 r3 = bpf2sw64[BPF_REG_3]; /* u64 index */ - const u8 tmp = bpf2sw64[TMP_REG_1]; - const u8 prg = bpf2sw64[TMP_REG_2]; + const u8 tmp = get_tmp_reg(ctx); + const u8 prg = get_tmp_reg(ctx); const u8 tcc = bpf2sw64[TCALL_CNT]; - const int idx0 = ctx->idx; -#define cur_offset (ctx->idx - idx0) -#define jmp_offset (out_offset - (cur_offset)) u64 offset; + static int out_idx; +#define out_offset (ctx->image ? (out_idx - ctx->idx - 1) : 0) /* if (index >= array->map.max_entries) * goto out; */ offset = offsetof(struct bpf_array, map.max_entries); - emit_sw64_ldu64(tmp, offset, ctx); /* tmp = offset */ - emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &map.max_entries */ + emit_sw64_ldu64(tmp, offset, ctx); + emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &map.max_entries */ emit(SW64_BPF_LDW(tmp, tmp, 0), ctx); /* tmp = *tmp = map.max_entries */ - emit(SW64_BPF_ZAPNOT_IMM(tmp, 0xf, tmp), ctx); /* map.max_entries is u32 */ - emit(SW64_BPF_SUBL_REG(r3, tmp, tmp), ctx); /* tmp = r3 - tmp = index - map.max_entries */ - emit(SW64_BPF_BGE(tmp, jmp_offset), ctx); + emit(SW64_BPF_ZAP_IMM(tmp, 0xf0, tmp), ctx); /* map.max_entries is u32 */ + emit(SW64_BPF_CMPULE_REG(tmp, r3, tmp), ctx); + emit(SW64_BPF_BNE(tmp, out_offset), ctx); /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; * tail_call_cnt++; */ - emit(SW64_BPF_LDI(tmp, SW64_BPF_REG_ZR, MAX_TAIL_CALL_CNT), ctx); - emit(SW64_BPF_SUBL_REG(tcc, tmp, tmp), ctx); - emit(SW64_BPF_BGT(tmp, jmp_offset), ctx); + emit_sw64_ldu64(tmp, MAX_TAIL_CALL_CNT, ctx); + emit(SW64_BPF_CMPULE_REG(tcc, tmp, tmp), ctx); + emit(SW64_BPF_BEQ(tmp, out_offset), ctx); emit(SW64_BPF_ADDL_IMM(tcc, 1, tcc), ctx); /* prog = array->ptrs[index]; @@ -398,34 +629,33 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) * goto out; */ offset = offsetof(struct bpf_array, ptrs); - emit_sw64_ldu64(tmp, offset, ctx); /* tmp = offset of ptrs */ - emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &ptrs */ - emit(SW64_BPF_SLL_IMM(r3, 3, prg), ctx); /* prg = r3 * 8, ptrs is 8 bit aligned */ - emit(SW64_BPF_ADDL_REG(tmp, prg, prg), ctx); /* prg = tmp + prg = &prog */ - emit(SW64_BPF_LDL(prg, prg, 0), ctx); /* prg = *prg = prog */ - emit(SW64_BPF_BEQ(prg, jmp_offset), ctx); + emit_sw64_ldu64(tmp, offset, ctx); + emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &ptrs[0] */ + emit(SW64_BPF_SLL_IMM(r3, 3, prg), ctx); /* prg = r3 * 8, each entry is a pointer */ + emit(SW64_BPF_ADDL_REG(tmp, prg, prg), ctx); /* prg = tmp + prg = &ptrs[index] */ + emit(SW64_BPF_LDL(prg, prg, 0), ctx); /* prg = *prg = ptrs[index] = prog */ + emit(SW64_BPF_BEQ(prg, out_offset), ctx); /* goto *(prog->bpf_func + prologue_offset); */ offset = offsetof(struct bpf_prog, bpf_func); - emit_sw64_ldu64(tmp, offset, ctx); /* tmp = offset */ + emit_sw64_ldu64(tmp, offset, ctx); emit(SW64_BPF_ADDL_REG(prg, tmp, tmp), ctx); /* tmp = prg + tmp = &bpf_func */ - emit(SW64_BPF_LDW(tmp, tmp, 0), ctx); /* tmp = *tmp = bpf_func */ - emit(SW64_BPF_ZAPNOT_IMM(tmp, 0xf, tmp), ctx); /* bpf_func is unsigned int */ + emit(SW64_BPF_LDL(tmp, tmp, 0), ctx); /* tmp = *tmp = bpf_func */ + emit(SW64_BPF_BEQ(tmp, out_offset), ctx); emit(SW64_BPF_ADDL_REG(tmp, sizeof(u32) * PROLOGUE_OFFSET, tmp), ctx); emit(SW64_BPF_ADDL_REG(SW64_BPF_REG_SP, ctx->stack_size, SW64_BPF_REG_SP), ctx); - emit(SW64_BPF_BR(tmp, 0), ctx); + emit(SW64_BPF_JMP(SW64_BPF_REG_ZR, tmp), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); /* out */ - if (out_offset == -1) - out_offset = cur_offset; - if (cur_offset != out_offset) { - pr_err("tail_call out_offset = %d, expected %d!\n", - cur_offset, out_offset); + if (ctx->image == NULL) + out_idx = ctx->idx; + if (ctx->image != NULL && out_offset <= 0) return -1; - } +#undef out_offset return 0; -#undef cur_offset -#undef jmp_offset } /* JITs an eBPF instruction. @@ -434,61 +664,79 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) * >0 - successfully JITed a 16-byte eBPF instruction. * <0 - failed to JIT. */ -static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) +static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 code = insn->code; const u8 dst = bpf2sw64[insn->dst_reg]; const u8 src = bpf2sw64[insn->src_reg]; - const u8 tmp1 = bpf2sw64[TMP_REG_1]; - const u8 tmp2 = bpf2sw64[TMP_REG_2]; + const u8 tmp1 __maybe_unused = get_tmp_reg(ctx); + const u8 tmp2 __maybe_unused = get_tmp_reg(ctx); const s16 off = insn->off; const s32 imm = insn->imm; - int jmp_offset; + const int bpf_idx = insn - ctx->prog->insnsi; + s32 jmp_offset; u64 func; struct bpf_insn insn1; u64 imm64; switch (code) { case BPF_ALU | BPF_MOV | BPF_X: + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_MOV | BPF_X: - emit(SW64_BPF_LDI(dst, src, 0), ctx); + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, dst), ctx); break; case BPF_ALU | BPF_ADD | BPF_X: emit(SW64_BPF_ADDW_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); break; case BPF_ALU64 | BPF_ADD | BPF_X: emit(SW64_BPF_ADDL_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_SUB | BPF_X: emit(SW64_BPF_SUBW_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); break; case BPF_ALU64 | BPF_SUB | BPF_X: emit(SW64_BPF_SUBL_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_MUL | BPF_X: emit(SW64_BPF_MULW_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); break; case BPF_ALU64 | BPF_MUL | BPF_X: emit(SW64_BPF_MULL_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_DIV | BPF_X: + emit_sw64_divmod(dst, src, ctx, code); + break; case BPF_ALU64 | BPF_DIV | BPF_X: - emit_sw64_div(dst, src, dst, ctx); - return -EINVAL; + emit_sw64_divmod(dst, src, ctx, code); + break; case BPF_ALU | BPF_MOD | BPF_X: + emit_sw64_divmod(dst, src, ctx, code); + break; case BPF_ALU64 | BPF_MOD | BPF_X: - emit_sw64_mod(dst, src, dst, ctx); - return -EINVAL; + emit_sw64_divmod(dst, src, ctx, code); + break; case BPF_ALU | BPF_LSH | BPF_X: + emit(SW64_BPF_SLL_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_LSH | BPF_X: emit(SW64_BPF_SLL_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_RSH | BPF_X: - emit(SW64_BPF_ZAPNOT_IMM(dst, 0xf, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); case BPF_ALU64 | BPF_RSH | BPF_X: emit(SW64_BPF_SRL_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_ARSH | BPF_X: + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, dst), ctx); + emit(SW64_BPF_SRA_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_ARSH | BPF_X: emit(SW64_BPF_SRA_REG(dst, src, dst), ctx); break; @@ -498,16 +746,18 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case BPF_ALU | BPF_OR | BPF_X: case BPF_ALU64 | BPF_OR | BPF_X: - emit(SW64_BPF_OR_REG(dst, src, dst), ctx); + emit(SW64_BPF_BIS_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_XOR | BPF_X: case BPF_ALU64 | BPF_XOR | BPF_X: emit(SW64_BPF_XOR_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_NEG: + emit(SW64_BPF_SUBW_REG(SW64_BPF_REG_ZR, dst, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_NEG: - emit(SW64_BPF_SEXTB_IMM(0xff, tmp1), ctx); - emit(SW64_BPF_XOR_IMM(dst, tmp1, dst), ctx); + emit(SW64_BPF_SUBL_REG(SW64_BPF_REG_ZR, dst, dst), ctx); break; case BPF_ALU | BPF_END | BPF_TO_LE: switch (imm) { @@ -519,7 +769,12 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case 64: break; + default: + pr_err("eBPF JIT %s[%d]: BPF_TO_LE unknown size\n", + current->comm, current->pid); + return -EINVAL; } + break; case BPF_ALU | BPF_END | BPF_TO_BE: switch (imm) { case 16: @@ -531,71 +786,203 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case 64: emit_sw64_htobe64(dst, ctx); break; + default: + pr_err("eBPF JIT %s[%d]: BPF_TO_BE unknown size\n", + current->comm, current->pid); + return -EINVAL; } + break; case BPF_ALU | BPF_MOV | BPF_K: + if (imm >= S16_MIN && imm <= S16_MAX) + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + else + emit_sw64_ldu32(dst, imm, ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_MOV | BPF_K: - emit_sw64_lds32(dst, imm, ctx); + if (imm >= S16_MIN && imm <= S16_MAX) + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + else + emit_sw64_lds32(dst, imm, ctx); break; case BPF_ALU | BPF_ADD | BPF_K: + if (imm >= S16_MIN && imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, dst, imm), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_ADDW_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_ADD | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); + if (imm >= S16_MIN && imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, dst, imm), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_SUB | BPF_K: + if (imm >= -S16_MAX && imm <= -S16_MIN) { + emit(SW64_BPF_LDI(dst, dst, -imm), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_SUBL_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_SUB | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_SUBL_REG(dst, tmp1, dst), ctx); + if (imm >= -S16_MAX && imm <= -S16_MIN) { + emit(SW64_BPF_LDI(dst, dst, -imm), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_SUBL_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_MUL | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_MULL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_MULL_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_MUL | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_MULL_REG(dst, tmp1, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_MULL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_MULL_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_DIV | BPF_K: + emit_sw64_ldu32(tmp1, imm, ctx); + emit_sw64_divmod(dst, tmp1, ctx, code); + break; case BPF_ALU64 | BPF_DIV | BPF_K: emit_sw64_lds32(tmp1, imm, ctx); - emit_sw64_div(dst, src, tmp1, ctx); - return -EINVAL; + emit_sw64_divmod(dst, tmp1, ctx, code); + break; case BPF_ALU | BPF_MOD | BPF_K: + emit_sw64_ldu32(tmp1, imm, ctx); + emit_sw64_divmod(dst, tmp1, ctx, code); + break; case BPF_ALU64 | BPF_MOD | BPF_K: emit_sw64_lds32(tmp1, imm, ctx); - emit_sw64_mod(dst, src, tmp1, ctx); - return -EINVAL; + emit_sw64_divmod(dst, tmp1, ctx, code); + break; case BPF_ALU | BPF_LSH | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SLL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_SLL_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_LSH | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_SLL_REG(dst, tmp1, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SLL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_SLL_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_RSH | BPF_K: - emit(SW64_BPF_ZAPNOT_IMM(dst, 0xf, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SRL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_SRL_REG(dst, tmp1, dst), ctx); + } + break; case BPF_ALU64 | BPF_RSH | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_SRL_REG(dst, tmp1, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SRL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_SRL_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_ARSH | BPF_K: + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SRA_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_SRA_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_ARSH | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_SRA_REG(dst, tmp1, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SRA_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_SRA_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_AND | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_AND_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx); + } + break; case BPF_ALU64 | BPF_AND | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_AND_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_OR | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_BIS_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_BIS_REG(dst, tmp1, dst), ctx); + } + break; case BPF_ALU64 | BPF_OR | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_OR_REG(dst, tmp1, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_BIS_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_BIS_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_XOR | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_XOR_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx); + } + break; case BPF_ALU64 | BPF_XOR | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_XOR_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx); + } break; case BPF_JMP | BPF_JA: - emit(SW64_BPF_BR(SW64_BPF_REG_RA, off), ctx); + jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx); + if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) { + emit(SW64_BPF_BR(SW64_BPF_REG_ZR, jmp_offset), ctx); + } else { + pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n", + current->comm, current->pid, jmp_offset); + return -EINVAL; + } break; case BPF_JMP | BPF_JEQ | BPF_X: @@ -645,7 +1032,14 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit(SW64_BPF_AND_REG(dst, src, tmp1), ctx); break; } - emit(SW64_BPF_BLBS(tmp1, off), ctx); + jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx); + if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) { + emit(SW64_BPF_BNE(tmp1, jmp_offset), ctx); + } else { + pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n", + current->comm, current->pid, jmp_offset); + return -EINVAL; + } break; case BPF_JMP | BPF_JEQ | BPF_K: @@ -662,47 +1056,54 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_sw64_lds32(tmp1, imm, ctx); switch (BPF_OP(code)) { case BPF_JEQ: - emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp1), ctx); + emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp2), ctx); break; case BPF_JGT: - emit(SW64_BPF_CMPULT_REG(tmp1, dst, tmp1), ctx); + emit(SW64_BPF_CMPULT_REG(tmp1, dst, tmp2), ctx); break; case BPF_JLT: - emit(SW64_BPF_CMPULT_REG(dst, tmp1, tmp1), ctx); + emit(SW64_BPF_CMPULT_REG(dst, tmp1, tmp2), ctx); break; case BPF_JGE: - emit(SW64_BPF_CMPULE_REG(tmp1, dst, tmp1), ctx); + emit(SW64_BPF_CMPULE_REG(tmp1, dst, tmp2), ctx); break; case BPF_JLE: - emit(SW64_BPF_CMPULE_REG(dst, tmp1, tmp1), ctx); + emit(SW64_BPF_CMPULE_REG(dst, tmp1, tmp2), ctx); break; case BPF_JNE: - emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp1), ctx); - emit(SW64_BPF_XOR_IMM(tmp1, 1, tmp1), ctx); + emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp2), ctx); + emit(SW64_BPF_XOR_IMM(tmp2, 1, tmp2), ctx); break; case BPF_JSGT: - emit(SW64_BPF_CMPLT_REG(tmp1, dst, tmp1), ctx); + emit(SW64_BPF_CMPLT_REG(tmp1, dst, tmp2), ctx); break; case BPF_JSLT: - emit(SW64_BPF_CMPLT_REG(dst, tmp1, tmp1), ctx); + emit(SW64_BPF_CMPLT_REG(dst, tmp1, tmp2), ctx); break; case BPF_JSGE: - emit(SW64_BPF_CMPLE_REG(tmp1, dst, tmp1), ctx); + emit(SW64_BPF_CMPLE_REG(tmp1, dst, tmp2), ctx); break; case BPF_JSLE: - emit(SW64_BPF_CMPLE_REG(dst, tmp1, tmp1), ctx); + emit(SW64_BPF_CMPLE_REG(dst, tmp1, tmp2), ctx); break; case BPF_JSET: - emit(SW64_BPF_AND_REG(dst, tmp1, tmp1), ctx); + emit(SW64_BPF_AND_REG(dst, tmp1, tmp2), ctx); break; } - emit(SW64_BPF_BLBS(tmp1, off), ctx); + jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx); + if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) { + emit(SW64_BPF_BNE(tmp2, jmp_offset), ctx); + } else { + pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n", + current->comm, current->pid, jmp_offset); + return -EINVAL; + } break; case BPF_JMP | BPF_CALL: func = (u64)__bpf_call_base + imm; - emit_sw64_ldu64(tmp1, func, ctx); - emit(SW64_BPF_CALL(SW64_BPF_REG_RA, tmp1), ctx); + emit_sw64_ldu64(SW64_BPF_REG_PV, func, ctx); + emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx); break; case BPF_JMP | BPF_TAIL_CALL: @@ -711,38 +1112,45 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case BPF_JMP | BPF_EXIT: - if (insn - ctx->prog->insnsi + 1 == ctx->prog->len) + // if this is the last instruction, fallthrough to epilogue + if (bpf_idx == ctx->prog->len - 1) break; - jmp_offset = (offset_to_epilogue(ctx) - 1) * 4; - // emit(SW64_BPF_BR(SW64_BPF_REG_ZR, jmp_offset), ctx); - // break; - emit_sw64_lds32(tmp1, jmp_offset, ctx); - emit(SW64_BPF_BR(tmp2, 0), ctx); - emit(SW64_BPF_ADDL_REG(tmp1, tmp2, tmp1), ctx); - emit(SW64_BPF_JMP(SW64_BPF_REG_ZR, tmp1), ctx); + jmp_offset = offset_to_epilogue(ctx) - 1; + // epilogue is always at the end, must jump forward + if (jmp_offset >= -1 && jmp_offset <= 0xfffff) { + if (ctx->image && !jmp_offset) + // if this is the last instruction, fallthrough to epilogue + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); + else + emit(SW64_BPF_BR(SW64_BPF_REG_ZR, jmp_offset), ctx); + } else { + pr_err("eBPF JIT %s[%d]: BPF_EXIT out of range, %d instructions\n", + current->comm, current->pid, jmp_offset); + return -EINVAL; + } break; case BPF_LD | BPF_IMM | BPF_DW: insn1 = insn[1]; - imm64 = (u64)insn1.imm << 32 | (u32)imm; + imm64 = ((u64)insn1.imm << 32) | (u32)imm; emit_sw64_ldu64(dst, imm64, ctx); - + put_tmp_reg(ctx); + put_tmp_reg(ctx); return 1; /* LDX: dst = *(size *)(src + off) */ case BPF_LDX | BPF_MEM | BPF_W: emit(SW64_BPF_LDW(dst, src, off), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); break; case BPF_LDX | BPF_MEM | BPF_H: emit(SW64_BPF_LDHU(dst, src, off), ctx); - emit(SW64_BPF_SEXTH_REG(dst, dst), ctx); break; case BPF_LDX | BPF_MEM | BPF_B: emit(SW64_BPF_LDBU(dst, src, off), ctx); - emit(SW64_BPF_SEXTB_REG(dst, dst), ctx); break; case BPF_LDX | BPF_MEM | BPF_DW: - emit(SW64_BPF_LDW(dst, src, off), ctx); + emit(SW64_BPF_LDL(dst, src, off), ctx); break; /* ST: *(size *)(dst + off) = imm */ @@ -773,33 +1181,32 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit(SW64_BPF_STW(src, dst, off), ctx); break; case BPF_STX | BPF_MEM | BPF_H: - emit(SW64_BPF_STW(src, dst, off), ctx); + emit(SW64_BPF_STH(src, dst, off), ctx); break; case BPF_STX | BPF_MEM | BPF_B: - emit(SW64_BPF_STW(src, dst, off), ctx); + emit(SW64_BPF_STB(src, dst, off), ctx); break; case BPF_STX | BPF_MEM | BPF_DW: - emit(SW64_BPF_STW(src, dst, off), ctx); + emit(SW64_BPF_STL(src, dst, off), ctx); break; /* STX XADD: lock *(u32 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_W: - emit(SW64_BPF_LDW(tmp1, dst, off), ctx); - emit(SW64_BPF_ADDW_REG(tmp1, src, tmp1), ctx); - emit(SW64_BPF_STW(tmp1, dst, off), ctx); + emit_sw64_xadd32(src, dst, off, ctx); break; /* STX XADD: lock *(u64 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_DW: - emit(SW64_BPF_LDL(tmp1, dst, off), ctx); - emit(SW64_BPF_ADDL_REG(tmp1, src, tmp1), ctx); - emit(SW64_BPF_STL(tmp1, dst, off), ctx); + emit_sw64_xadd64(src, dst, off, ctx); break; default: - pr_err("unknown opcode %02x\n", code); + pr_err("eBPF JIT %s[%d]: unknown opcode 0x%02x\n", + current->comm, current->pid, code); return -EINVAL; } + put_tmp_reg(ctx); + put_tmp_reg(ctx); return 0; } @@ -813,16 +1220,16 @@ static int build_body(struct jit_ctx *ctx) int ret; ret = build_insn(insn, ctx); - if (ret > 0) { + if (ret < 0) + return ret; + if (ctx->image == NULL) + ctx->insn_offset[i] = ctx->idx; + while (ret > 0) { i++; if (ctx->image == NULL) ctx->insn_offset[i] = ctx->idx; - continue; + ret--; } - if (ctx->image == NULL) - ctx->insn_offset[i] = ctx->idx; - if (ret) - return ret; } return 0; @@ -911,7 +1318,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) build_epilogue(&ctx); /* Now we know the actual image size. */ - image_size = sizeof(u32) * ctx.idx; + /* And we need extra 8 bytes for lock instructions alignment */ + image_size = sizeof(u32) * ctx.idx + 8; header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); if (header == NULL) { @@ -921,7 +1329,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* 2. Now, the actual pass. */ - ctx.image = (u32 *)image_ptr; + /* lock instructions need 8-byte alignment */ + ctx.image = (u32 *)(((unsigned long)image_ptr + 7) & (~7)); skip_init_ctx: ctx.idx = 0; @@ -958,6 +1367,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog->bpf_func = (void *)ctx.image; prog->jited = 1; prog->jited_len = image_size; + if (ctx.current_tmp_reg) { + pr_err("eBPF JIT %s[%d]: unreleased temporary regsters %d\n", + current->comm, current->pid, ctx.current_tmp_reg); + } if (!prog->is_func || extra_pass) { out_off: -- Gitee From 02da9e00679727f2984bbcca9210fde0ee3697c5 Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Tue, 30 Aug 2022 13:58:58 +0800 Subject: [PATCH 46/77] sw64: bpf: fix ebpf jit compiler Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNGJ -------------------------------- This patch makes following changes to ebpf jit compiler: * switch to unsigned 64-bit div and mod to avoid incorrect overflow result * fix calling other bpf programs directly * fix tail call * fix jit_fill_hole() * change ILLEGAL_INSN so it can be used in the future Results of "test_verifier" in jited and emulated mode are now same. Extra space in jited image is now filled with illegal instructions correctly. Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/net/bpf_jit.h | 2 +- arch/sw_64/net/bpf_jit_comp.c | 35 +++++++++++++++++++++-------------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/arch/sw_64/net/bpf_jit.h b/arch/sw_64/net/bpf_jit.h index e4c96995bd96..2cf5ba5253a8 100644 --- a/arch/sw_64/net/bpf_jit.h +++ b/arch/sw_64/net/bpf_jit.h @@ -96,7 +96,7 @@ #define SW64_BPF_FUNC_ALU_SEXTH 0x6B /* special instuction used in jit_fill_hole() */ -#define SW64_BPF_ILLEGAL_INSN (0x1bff1000) /* rd_f $31 */ +#define SW64_BPF_ILLEGAL_INSN (0x1ff00000) /* pri_ret/b $31 */ enum sw64_bpf_registers { SW64_BPF_REG_V0 = 0, /* keep return value */ diff --git a/arch/sw_64/net/bpf_jit_comp.c b/arch/sw_64/net/bpf_jit_comp.c index f1e471a0789b..98ddb60200c8 100644 --- a/arch/sw_64/net/bpf_jit_comp.c +++ b/arch/sw_64/net/bpf_jit_comp.c @@ -307,9 +307,9 @@ noinline void sw64_bpf_jit_helper_mod32(void) noinline void sw64_bpf_jit_helper_div64(void) { - register s64 __dividend asm(REG(DIVIDEND)); - register s64 __divisor asm(REG(DIVISOR)); - s64 res = __dividend / __divisor; + register u64 __dividend asm(REG(DIVIDEND)); + register u64 __divisor asm(REG(DIVISOR)); + u64 res = __dividend / __divisor; asm volatile( "" @@ -318,9 +318,9 @@ noinline void sw64_bpf_jit_helper_div64(void) noinline void sw64_bpf_jit_helper_mod64(void) { - register s64 __dividend asm(REG(DIVIDEND)); - register s64 __divisor asm(REG(DIVISOR)); - s64 res = __dividend % __divisor; + register u64 __dividend asm(REG(DIVIDEND)); + register u64 __divisor asm(REG(DIVISOR)); + u64 res = __dividend % __divisor; asm volatile( "" @@ -508,7 +508,10 @@ static void emit_sw64_htobe64(const int dst, struct jit_ctx *ctx) static void jit_fill_hole(void *area, unsigned int size) { - memset(area, SW64_BPF_ILLEGAL_INSN, size); + unsigned long c = SW64_BPF_ILLEGAL_INSN; + + c |= c << 32; + __constant_c_memset(area, c, size); } static int bpf2sw64_offset(int bpf_idx, s32 off, const struct jit_ctx *ctx) @@ -593,9 +596,9 @@ static void build_epilogue(struct jit_ctx *ctx) static int emit_bpf_tail_call(struct jit_ctx *ctx) { - /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ + /* bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) */ const u8 r2 = bpf2sw64[BPF_REG_2]; /* struct bpf_array *array */ - const u8 r3 = bpf2sw64[BPF_REG_3]; /* u64 index */ + const u8 r3 = bpf2sw64[BPF_REG_3]; /* u32 index */ const u8 tmp = get_tmp_reg(ctx); const u8 prg = get_tmp_reg(ctx); @@ -612,6 +615,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &map.max_entries */ emit(SW64_BPF_LDW(tmp, tmp, 0), ctx); /* tmp = *tmp = map.max_entries */ emit(SW64_BPF_ZAP_IMM(tmp, 0xf0, tmp), ctx); /* map.max_entries is u32 */ + emit(SW64_BPF_ZAP_IMM(r3, 0xf0, r3), ctx); /* index is u32 */ emit(SW64_BPF_CMPULE_REG(tmp, r3, tmp), ctx); emit(SW64_BPF_BNE(tmp, out_offset), ctx); @@ -620,8 +624,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) * tail_call_cnt++; */ emit_sw64_ldu64(tmp, MAX_TAIL_CALL_CNT, ctx); - emit(SW64_BPF_CMPULE_REG(tcc, tmp, tmp), ctx); - emit(SW64_BPF_BEQ(tmp, out_offset), ctx); + emit(SW64_BPF_CMPULT_REG(tmp, tcc, tmp), ctx); + emit(SW64_BPF_BNE(tmp, out_offset), ctx); emit(SW64_BPF_ADDL_IMM(tcc, 1, tcc), ctx); /* prog = array->ptrs[index]; @@ -642,8 +646,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit(SW64_BPF_ADDL_REG(prg, tmp, tmp), ctx); /* tmp = prg + tmp = &bpf_func */ emit(SW64_BPF_LDL(tmp, tmp, 0), ctx); /* tmp = *tmp = bpf_func */ emit(SW64_BPF_BEQ(tmp, out_offset), ctx); - emit(SW64_BPF_ADDL_REG(tmp, sizeof(u32) * PROLOGUE_OFFSET, tmp), ctx); - emit(SW64_BPF_ADDL_REG(SW64_BPF_REG_SP, ctx->stack_size, SW64_BPF_REG_SP), ctx); + emit(SW64_BPF_LDI(tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, ctx->stack_size), ctx); emit(SW64_BPF_JMP(SW64_BPF_REG_ZR, tmp), ctx); put_tmp_reg(ctx); @@ -652,7 +656,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) /* out */ if (ctx->image == NULL) out_idx = ctx->idx; - if (ctx->image != NULL && out_offset <= 0) + if (ctx->image != NULL && out_idx <= 0) return -1; #undef out_offset return 0; @@ -1102,6 +1106,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_JMP | BPF_CALL: func = (u64)__bpf_call_base + imm; + if ((func & 0xffffffffe0000000UL) != 0xffffffff80000000UL) + /* calling bpf program, switch to vmalloc addr */ + func = (func & 0xffffffff) | 0xfffff00000000000UL; emit_sw64_ldu64(SW64_BPF_REG_PV, func, ctx); emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx); break; -- Gitee From 9b3064beb18c536b28ea7ea2cd5b9c3dc9180edc Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Wed, 31 Aug 2022 15:47:15 +0800 Subject: [PATCH 47/77] sw64: bpf: fix insn_offset Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNHA -------------------------------- Since ctx->idx is the index of the next jited instruction, value of insn_offset should be set before build_insn(). Allocate 1 more entry for insn_offset[], and give epilogue_offset to it, so the correct jump offset can be calculated if the last instruction is BPF_JMP. Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/net/bpf_jit_comp.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/sw_64/net/bpf_jit_comp.c b/arch/sw_64/net/bpf_jit_comp.c index 98ddb60200c8..10fc58eb4d37 100644 --- a/arch/sw_64/net/bpf_jit_comp.c +++ b/arch/sw_64/net/bpf_jit_comp.c @@ -516,8 +516,8 @@ static void jit_fill_hole(void *area, unsigned int size) static int bpf2sw64_offset(int bpf_idx, s32 off, const struct jit_ctx *ctx) { - int from = ctx->insn_offset[bpf_idx]; - int to = ctx->insn_offset[bpf_idx + off]; + int from = ctx->insn_offset[bpf_idx + 1]; + int to = ctx->insn_offset[bpf_idx + 1 + off]; if (ctx->image == NULL) return 0; @@ -1226,15 +1226,15 @@ static int build_body(struct jit_ctx *ctx) const struct bpf_insn *insn = &prog->insnsi[i]; int ret; + if (ctx->image == NULL) + ctx->insn_offset[i] = ctx->idx; ret = build_insn(insn, ctx); if (ret < 0) return ret; - if (ctx->image == NULL) - ctx->insn_offset[i] = ctx->idx; while (ret > 0) { i++; if (ctx->image == NULL) - ctx->insn_offset[i] = ctx->idx; + ctx->insn_offset[i] = ctx->insn_offset[i - 1]; ret--; } } @@ -1305,7 +1305,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; - ctx.insn_offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); + ctx.insn_offset = kcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); if (ctx.insn_offset == NULL) { prog = orig_prog; goto out_off; @@ -1321,7 +1321,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_off; } - ctx.epilogue_offset = ctx.idx; + ctx.insn_offset[prog->len] = ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); /* Now we know the actual image size. */ -- Gitee From 2973c579acc563e8c95fd8ca09ee0d1af4ea1696 Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Wed, 31 Aug 2022 17:06:56 +0800 Subject: [PATCH 48/77] sw64: bpf: fix 32-bit bitwise operation Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNI5 -------------------------------- Make sure the result of 32-bit bitwise operation is zero extended to 64 bits. Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/net/bpf_jit_comp.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/sw_64/net/bpf_jit_comp.c b/arch/sw_64/net/bpf_jit_comp.c index 10fc58eb4d37..5e3d8d5327d3 100644 --- a/arch/sw_64/net/bpf_jit_comp.c +++ b/arch/sw_64/net/bpf_jit_comp.c @@ -745,14 +745,23 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit(SW64_BPF_SRA_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_AND | BPF_X: + emit(SW64_BPF_AND_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_AND | BPF_X: emit(SW64_BPF_AND_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_OR | BPF_X: + emit(SW64_BPF_BIS_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_OR | BPF_X: emit(SW64_BPF_BIS_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_XOR | BPF_X: + emit(SW64_BPF_XOR_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_XOR | BPF_X: emit(SW64_BPF_XOR_REG(dst, src, dst), ctx); break; @@ -936,6 +945,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_sw64_ldu32(tmp1, imm, ctx); emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx); } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); break; case BPF_ALU64 | BPF_AND | BPF_K: if (imm >= 0 && imm <= U8_MAX) { @@ -952,6 +962,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_sw64_ldu32(tmp1, imm, ctx); emit(SW64_BPF_BIS_REG(dst, tmp1, dst), ctx); } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); break; case BPF_ALU64 | BPF_OR | BPF_K: if (imm >= 0 && imm <= U8_MAX) { @@ -968,6 +979,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_sw64_ldu32(tmp1, imm, ctx); emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx); } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); break; case BPF_ALU64 | BPF_XOR | BPF_K: if (imm >= 0 && imm <= U8_MAX) { -- Gitee From e5d02961bb2202a810731fe2de1901f56976b3b4 Mon Sep 17 00:00:00 2001 From: Du Yilong Date: Fri, 2 Sep 2022 09:00:44 +0800 Subject: [PATCH 49/77] sw64: kvm: turn off the clock timer of guest os Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNJ3 -------------------------------- During guest os reboot test, an error occurred by chance and the hypervisor reports guest unknown hardware error. The interrupt of guest os cannot be handled because the interrupt route table of guest is empty during reboot time if interrupt are enabled by mistake. The problem of guest os reboot failures are significantly reduced by turning off the clock timer. Signed-off-by: Du Yilong Signed-off-by: Gu Zitao --- arch/sw_64/kvm/kvm-sw64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sw_64/kvm/kvm-sw64.c b/arch/sw_64/kvm/kvm-sw64.c index 06e969caaaa6..825fe39f0494 100644 --- a/arch/sw_64/kvm/kvm-sw64.c +++ b/arch/sw_64/kvm/kvm-sw64.c @@ -411,6 +411,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) { unsigned long addr = vcpu->kvm->arch.host_phys_addr; + hrtimer_cancel(&vcpu->arch.hrt); vcpu->arch.vcb.whami = vcpu->vcpu_id; vcpu->arch.vcb.vcpu_irq_disabled = 1; vcpu->arch.pcpu_id = -1; /* force flush tlb for the first time */ -- Gitee From 7f897eb7ae511b2db56061c04b917e4b6654083b Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Mon, 5 Sep 2022 14:39:01 +0800 Subject: [PATCH 50/77] sw64: bpf: add BPF_JMP32 and BPF_PROBE_MEM Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5PNGJ -------------------------------- Add BPF_JMP32 and BPF_PROBE_MEM instructions support. Signed-off-by: Mao Minkai Signed-off-by: Gu Zitao --- arch/sw_64/net/bpf_jit_comp.c | 119 +++++++++++++++++++++++++++++----- 1 file changed, 104 insertions(+), 15 deletions(-) diff --git a/arch/sw_64/net/bpf_jit_comp.c b/arch/sw_64/net/bpf_jit_comp.c index 5e3d8d5327d3..2c238c33e574 100644 --- a/arch/sw_64/net/bpf_jit_comp.c +++ b/arch/sw_64/net/bpf_jit_comp.c @@ -59,6 +59,7 @@ struct jit_ctx { int current_tmp_reg; int epilogue_offset; int *insn_offset; // [bpf_insn_idx] = jited_insn_idx + int exentry_idx; u32 *image; // JITed instruction u32 stack_size; }; @@ -514,6 +515,7 @@ static void jit_fill_hole(void *area, unsigned int size) __constant_c_memset(area, c, size); } +static int offset_to_epilogue(const struct jit_ctx *ctx); static int bpf2sw64_offset(int bpf_idx, s32 off, const struct jit_ctx *ctx) { int from = ctx->insn_offset[bpf_idx + 1]; @@ -662,6 +664,39 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) return 0; } +/* For accesses to BTF pointers, add an entry to the exception table */ +static int add_exception_handler(const struct bpf_insn *insn, + struct jit_ctx *ctx, + int dst_reg) +{ + off_t offset; + unsigned long pc; + struct exception_table_entry *ex; + + if (!ctx->image) + /* First pass */ + return 0; + + if (!ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM) + return 0; + + if (WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) + return -EINVAL; + + ex = &ctx->prog->aux->extable[ctx->exentry_idx]; + pc = (unsigned long)&ctx->image[ctx->idx - 1]; + + offset = (long)&ex->insn - pc; + ex->insn = offset; + + ex->fixup.bits.nextinsn = sizeof(u32); + ex->fixup.bits.valreg = dst_reg; + ex->fixup.bits.errreg = SW64_BPF_REG_ZR; + + ctx->exentry_idx++; + return 0; +} + /* JITs an eBPF instruction. * Returns: * 0 - successfully JITed an 8-byte eBPF instruction. @@ -671,8 +706,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 code = insn->code; - const u8 dst = bpf2sw64[insn->dst_reg]; - const u8 src = bpf2sw64[insn->src_reg]; + u8 dst = bpf2sw64[insn->dst_reg]; + u8 src = bpf2sw64[insn->src_reg]; const u8 tmp1 __maybe_unused = get_tmp_reg(ctx); const u8 tmp2 __maybe_unused = get_tmp_reg(ctx); const s16 off = insn->off; @@ -682,6 +717,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) u64 func; struct bpf_insn insn1; u64 imm64; + int ret; switch (code) { case BPF_ALU | BPF_MOV | BPF_X: @@ -1001,6 +1037,21 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) } break; + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, src, tmp1), ctx); + src = tmp1; + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, tmp2), ctx); + dst = tmp2; case BPF_JMP | BPF_JEQ | BPF_X: case BPF_JMP | BPF_JGT | BPF_X: case BPF_JMP | BPF_JLT | BPF_X: @@ -1058,6 +1109,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) } break; + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, tmp2), ctx); + dst = tmp2; case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP | BPF_JLT | BPF_K: @@ -1159,17 +1223,32 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) /* LDX: dst = *(size *)(src + off) */ case BPF_LDX | BPF_MEM | BPF_W: - emit(SW64_BPF_LDW(dst, src, off), ctx); - emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); - break; case BPF_LDX | BPF_MEM | BPF_H: - emit(SW64_BPF_LDHU(dst, src, off), ctx); - break; case BPF_LDX | BPF_MEM | BPF_B: - emit(SW64_BPF_LDBU(dst, src, off), ctx); - break; case BPF_LDX | BPF_MEM | BPF_DW: - emit(SW64_BPF_LDL(dst, src, off), ctx); + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_W: + case BPF_LDX | BPF_PROBE_MEM | BPF_H: + case BPF_LDX | BPF_PROBE_MEM | BPF_B: + switch (BPF_SIZE(code)) { + case BPF_W: + emit(SW64_BPF_LDW(dst, src, off), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_H: + emit(SW64_BPF_LDHU(dst, src, off), ctx); + break; + case BPF_B: + emit(SW64_BPF_LDBU(dst, src, off), ctx); + break; + case BPF_DW: + emit(SW64_BPF_LDL(dst, src, off), ctx); + break; + } + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; break; /* ST: *(size *)(dst + off) = imm */ @@ -1263,6 +1342,9 @@ static int validate_code(struct jit_ctx *ctx) return -1; } + if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) + return -1; + return 0; } @@ -1280,7 +1362,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; - int image_size; + int image_size, prog_size, extable_size; u8 *image_ptr; if (!prog->jit_requested) @@ -1311,7 +1393,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) image_ptr = jit_data->image; header = jit_data->header; extra_pass = true; - image_size = sizeof(u32) * ctx.idx; + prog_size = sizeof(u32) * ctx.idx; goto skip_init_ctx; } memset(&ctx, 0, sizeof(ctx)); @@ -1336,9 +1418,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.insn_offset[prog->len] = ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); + extable_size = prog->aux->num_exentries * + sizeof(struct exception_table_entry); + /* Now we know the actual image size. */ /* And we need extra 8 bytes for lock instructions alignment */ - image_size = sizeof(u32) * ctx.idx + 8; + prog_size = sizeof(u32) * ctx.idx + 8; + image_size = prog_size + extable_size; header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); if (header == NULL) { @@ -1350,8 +1436,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* lock instructions need 8-byte alignment */ ctx.image = (u32 *)(((unsigned long)image_ptr + 7) & (~7)); + if (extable_size) + prog->aux->extable = (void *)image_ptr + prog_size; skip_init_ctx: ctx.idx = 0; + ctx.exentry_idx = 0; build_prologue(&ctx, was_classic); @@ -1372,7 +1461,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* And we're done. */ if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, image_size, 2, ctx.image); + bpf_jit_dump(prog->len, prog_size, 2, ctx.image); bpf_flush_icache(header, ctx.image + ctx.idx); @@ -1385,7 +1474,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } prog->bpf_func = (void *)ctx.image; prog->jited = 1; - prog->jited_len = image_size; + prog->jited_len = prog_size; if (ctx.current_tmp_reg) { pr_err("eBPF JIT %s[%d]: unreleased temporary regsters %d\n", current->comm, current->pid, ctx.current_tmp_reg); -- Gitee From 36789fd440fc7251a1fd851acb63cc3f3bb33cce Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Tue, 6 Sep 2022 16:39:04 +0800 Subject: [PATCH 51/77] sw64: fix assembly style Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XTGY -------------------------------- Signed-off-by: Mao Minkai Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/entry-ftrace.S | 2 +- arch/sw_64/kernel/entry.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sw_64/kernel/entry-ftrace.S b/arch/sw_64/kernel/entry-ftrace.S index 3f88a9fe2e3e..223dd5fc0808 100644 --- a/arch/sw_64/kernel/entry-ftrace.S +++ b/arch/sw_64/kernel/entry-ftrace.S @@ -79,7 +79,7 @@ ftrace_graph_call: /* ftrace_graph_caller(); */ /* "br ftrace_graph_caller" */ #endif mcount_end - ret $31, ($28), 1 + ret $31, ($28), 1 .end ftrace_caller #else /* !CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/sw_64/kernel/entry.S b/arch/sw_64/kernel/entry.S index 67bafd4a930a..cfb8e7c6d7fb 100644 --- a/arch/sw_64/kernel/entry.S +++ b/arch/sw_64/kernel/entry.S @@ -403,7 +403,7 @@ __switch_to: stl $15, TASK_THREAD_S6($16) /* Restore context from next->thread */ ldl $26, TASK_THREAD_RA($17) - ldl $30, TASK_THREAD_SP($17) + ldl $30, TASK_THREAD_SP($17) ldl $9, TASK_THREAD_S0($17) ldl $10, TASK_THREAD_S1($17) ldl $11, TASK_THREAD_S2($17) -- Gitee From b7e7080b5c847ed3381bccb59ba95e8ca1a6c5f4 Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Tue, 6 Sep 2022 16:39:04 +0800 Subject: [PATCH 52/77] sw64: remove useless local r26 in setup_rt_frame() Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XTGY -------------------------------- Signed-off-by: Mao Minkai Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/signal.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/sw_64/kernel/signal.c b/arch/sw_64/kernel/signal.c index 32c9484d2aa2..b80cf0e56224 100644 --- a/arch/sw_64/kernel/signal.c +++ b/arch/sw_64/kernel/signal.c @@ -261,7 +261,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - unsigned long oldsp, r26, err = 0; + unsigned long oldsp, err = 0; struct rt_sigframe __user *frame; oldsp = rdusp(); @@ -283,13 +283,8 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) if (err) return -EFAULT; - /* Set up to return from userspace. If provided, use a stub - * already in userspace. - */ - r26 = VDSO_SYMBOL(current->mm->context.vdso, rt_sigreturn); - /* "Return" to the handler */ - regs->r26 = r26; + regs->r26 = VDSO_SYMBOL(current->mm->context.vdso, rt_sigreturn); regs->r27 = regs->pc = (unsigned long) ksig->ka.sa.sa_handler; regs->r16 = ksig->sig; /* a0: signal number */ if (ksig->ka.sa.sa_flags & SA_SIGINFO) { -- Gitee From 2f56fc13b4519c721686da339f5c956eda8c79d1 Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Tue, 6 Sep 2022 16:39:04 +0800 Subject: [PATCH 53/77] sw64: clear .bss section using memset() Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XTGY -------------------------------- Signed-off-by: Mao Minkai Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/head.S | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/arch/sw_64/kernel/head.S b/arch/sw_64/kernel/head.S index 3dfb95c91d70..7cce2a8859e5 100644 --- a/arch/sw_64/kernel/head.S +++ b/arch/sw_64/kernel/head.S @@ -26,23 +26,11 @@ __start: /* ... and find our stack ... */ ldi $30, ASM_THREAD_SIZE($8) /* ... and then we can clear bss data. */ - ldi $2, __bss_start - ldi $3, __bss_stop - /* 8 bytes alignment */ -1: and $2, 0x7, $1 # align check - bne $1, 3f -2: subl $3, $2, $1 # align clear - ble $1, 4f - subl $1, 0x8, $1 - ble $1, 3f - stl $31, 0($2) - addl $2, 8, $2 - br $31, 2b -3: stb $31, 0($2) # non align clear - addl $2, 1, $2 - subl $3, $2, $1 - bgt $1, 1b -4:# finish clear + ldi $16, __bss_start + ldi $18, __bss_stop + subl $18, $16, $18 + mov $31, $17 + call $26, __constant_c_memset #ifdef CONFIG_RELOCATABLE ldi $30, -8($30) stl $29, 0($30) -- Gitee From 73e6b8e18418bdab52366cdf8444386eb3c2676d Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Tue, 6 Sep 2022 16:39:04 +0800 Subject: [PATCH 54/77] sw64: sort Kconfig select Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XTGY -------------------------------- Signed-off-by: Mao Minkai Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 136 ++++++++++++++++++++++----------------------- 1 file changed, 68 insertions(+), 68 deletions(-) diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index 36cddefb1ad4..7174b6218539 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -2,40 +2,13 @@ config SW64 bool default y - select AUDIT_ARCH - select HAVE_IDE - select HAVE_OPROFILE - select HAVE_PCSPKR_PLATFORM - select HAVE_PERF_EVENTS - select HAVE_FAST_GUP - select GENERIC_CLOCKEVENTS - select GENERIC_IRQ_PROBE - select GENERIC_IRQ_LEGACY - select GENERIC_IRQ_SHOW - select ARCH_WANT_IPC_PARSE_VERSION - select ARCH_HAVE_NMI_SAFE_CMPXCHG - select ARCH_NO_PREEMPT - select ARCH_USE_CMPXCHG_LOCKREF - select GENERIC_SMP_IDLE_THREAD - select HAVE_MOD_ARCH_SPECIFIC - select MODULES_USE_ELF_RELA - select ARCH_SUPPORTS_NUMA_BALANCING - select HAVE_ARCH_TRANSPARENT_HUGEPAGE - select HAVE_ARCH_AUDITSYSCALL - select HAVE_ARCH_SECCOMP_FILTER - select OLD_SIGSUSPEND - select GENERIC_STRNCPY_FROM_USER - select GENERIC_STRNLEN_USER - select HAVE_ARCH_KGDB + select ACPI + select ACPI_REDUCED_HARDWARE_ONLY + select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_PHYS_TO_DMA - select SWIOTLB - select HAVE_MEMBLOCK - select HAVE_MEMBLOCK_NODE_MAP - select NO_BOOTMEM - select ARCH_USE_QUEUED_RWLOCKS - select ARCH_USE_QUEUED_SPINLOCKS - select COMMON_CLK - select HANDLE_DOMAIN_IRQ + select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_SG_CHAIN + select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK select ARCH_INLINE_READ_LOCK_BH select ARCH_INLINE_READ_LOCK_IRQ @@ -44,60 +17,87 @@ config SW64 select ARCH_INLINE_READ_UNLOCK_BH select ARCH_INLINE_READ_UNLOCK_IRQ select ARCH_INLINE_READ_UNLOCK_IRQRESTORE - select ARCH_INLINE_WRITE_LOCK - select ARCH_INLINE_WRITE_LOCK_BH - select ARCH_INLINE_WRITE_LOCK_IRQ - select ARCH_INLINE_WRITE_LOCK_IRQSAVE - select ARCH_INLINE_WRITE_UNLOCK - select ARCH_INLINE_WRITE_UNLOCK_BH - select ARCH_INLINE_WRITE_UNLOCK_IRQ - select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE - select ARCH_INLINE_SPIN_TRYLOCK - select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK select ARCH_INLINE_SPIN_LOCK_BH select ARCH_INLINE_SPIN_LOCK_IRQ select ARCH_INLINE_SPIN_LOCK_IRQSAVE + select ARCH_INLINE_SPIN_TRYLOCK + select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_UNLOCK select ARCH_INLINE_SPIN_UNLOCK_BH select ARCH_INLINE_SPIN_UNLOCK_IRQ select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE + select ARCH_INLINE_WRITE_LOCK + select ARCH_INLINE_WRITE_LOCK_BH + select ARCH_INLINE_WRITE_LOCK_IRQ + select ARCH_INLINE_WRITE_LOCK_IRQSAVE + select ARCH_INLINE_WRITE_UNLOCK + select ARCH_INLINE_WRITE_UNLOCK_BH + select ARCH_INLINE_WRITE_UNLOCK_IRQ + select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + select ARCH_NO_PREEMPT + select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_HAS_SG_CHAIN - select IRQ_FORCED_THREADING + select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_SUPPORTS_UPROBES + select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_QUEUED_RWLOCKS + select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_WANT_FRAME_POINTERS + select ARCH_WANT_IPC_PARSE_VERSION + select AUDIT_ARCH + select COMMON_CLK + select DMA_OPS if PCI + select GENERIC_CLOCKEVENTS + select GENERIC_IRQ_LEGACY select GENERIC_IRQ_MIGRATION if SMP + select GENERIC_IRQ_PROBE + select GENERIC_IRQ_SHOW + select GENERIC_PCI_IOMAP if PCI + select GENERIC_SMP_IDLE_THREAD + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER + select GENERIC_TIME_VSYSCALL + select HANDLE_DOMAIN_IRQ + select HARDIRQS_SW_RESEND + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK - select HAVE_FUNCTION_TRACER + select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ASM_MODVERSIONS + select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE + select HAVE_EBPF_JIT + select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_C_RECORDMCOUNT select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER + select HAVE_IDE select HAVE_KPROBES select HAVE_KRETPROBES - select HAVE_SYSCALL_TRACEPOINTS - select ARCH_SUPPORTS_UPROBES - select OF_EARLY_FLATTREE if OF - select HAVE_EBPF_JIT - select SPARSEMEM_EXTREME if SPARSEMEM - select HAVE_ARCH_JUMP_LABEL - select ARCH_WANT_FRAME_POINTERS - select HAVE_ASM_MODVERSIONS - select ARCH_HAS_ELF_RANDOMIZE - select HAVE_PERF_USER_STACK_DUMP - select HAVE_PERF_REGS - select ARCH_SUPPORTS_ACPI - select ACPI - select ACPI_REDUCED_HARDWARE_ONLY - select GENERIC_TIME_VSYSCALL - select SET_FS + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_OPROFILE select HAVE_PCI - select GENERIC_PCI_IOMAP if PCI - select PCI_MSI_ARCH_FALLBACKS - select DMA_OPS if PCI + select HAVE_PCSPKR_PLATFORM + select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API - select ARCH_HAS_PTE_SPECIAL - select HARDIRQS_SW_RESEND + select HAVE_SYSCALL_TRACEPOINTS + select IRQ_FORCED_THREADING select MEMORY_HOTPLUG_SPARSE if MEMORY_HOTPLUG + select MODULES_USE_ELF_RELA + select NO_BOOTMEM + select OF_EARLY_FLATTREE if OF + select OLD_SIGSUSPEND + select PCI_MSI_ARCH_FALLBACKS + select SET_FS + select SPARSEMEM_EXTREME if SPARSEMEM + select SWIOTLB config LOCKDEP_SUPPORT def_bool y -- Gitee From 6768c8efd31a8e1888fccf755269a8c1c5ceabcb Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Tue, 6 Sep 2022 16:39:05 +0800 Subject: [PATCH 55/77] sw64: enable DEBUG_BUGVERBOSE by default Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XTGY -------------------------------- Enable DEBUG_BUGVERBOSE by default to make debug easier. Signed-off-by: Mao Minkai Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index 7174b6218539..17b48c7e5fc1 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -68,6 +68,7 @@ config SW64 select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ASM_MODVERSIONS select HAVE_C_RECORDMCOUNT + select HAVE_DEBUG_BUGVERBOSE select HAVE_DYNAMIC_FTRACE select HAVE_EBPF_JIT select HAVE_FAST_GUP -- Gitee From d95edbdd742c2f07478cd041489f69f217b70cbf Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Wed, 7 Sep 2022 15:20:01 +0800 Subject: [PATCH 56/77] sw64: fix incorrect white space use in macros Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Signed-off-by: Mao Minkai Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/chip/chip3/cpufreq_debugfs.c | 18 +- arch/sw_64/include/asm/atomic.h | 2 +- arch/sw_64/include/asm/barrier.h | 2 +- arch/sw_64/include/asm/clock.h | 2 +- arch/sw_64/include/asm/dma-mapping.h | 2 +- arch/sw_64/include/asm/efi.h | 2 +- arch/sw_64/include/asm/hcall.h | 2 +- arch/sw_64/include/asm/insn.h | 8 +- arch/sw_64/include/asm/kprobes.h | 2 +- arch/sw_64/include/asm/kvm_host.h | 2 +- arch/sw_64/include/asm/kvm_mmio.h | 2 +- arch/sw_64/include/asm/msi.h | 6 +- arch/sw_64/include/asm/numa.h | 4 +- arch/sw_64/include/asm/pgtable-4level.h | 2 +- arch/sw_64/include/asm/pgtable.h | 2 +- arch/sw_64/include/asm/platform.h | 2 +- arch/sw_64/include/asm/stacktrace.h | 2 +- arch/sw_64/include/asm/sw64_init.h | 2 +- arch/sw_64/include/asm/syscall.h | 2 +- arch/sw_64/include/asm/termios.h | 2 +- arch/sw_64/include/asm/vcpu.h | 6 +- arch/sw_64/include/uapi/asm/errno.h | 220 ++++++++++++------------ arch/sw_64/include/uapi/asm/ioctls.h | 4 +- arch/sw_64/include/uapi/asm/kvm.h | 2 +- arch/sw_64/include/uapi/asm/regdef.h | 16 +- arch/sw_64/include/uapi/asm/termbits.h | 128 +++++++------- arch/sw_64/kernel/acpi.c | 4 +- arch/sw_64/kernel/pci_impl.h | 2 +- arch/sw_64/kernel/setup.c | 2 +- arch/sw_64/kernel/time.c | 2 +- arch/sw_64/math-emu/math.c | 10 +- 31 files changed, 232 insertions(+), 232 deletions(-) diff --git a/arch/sw_64/chip/chip3/cpufreq_debugfs.c b/arch/sw_64/chip/chip3/cpufreq_debugfs.c index 13696360ef02..c58f1cee3907 100644 --- a/arch/sw_64/chip/chip3/cpufreq_debugfs.c +++ b/arch/sw_64/chip/chip3/cpufreq_debugfs.c @@ -7,15 +7,15 @@ #include #include -#define CLK_PRT 0x1UL -#define CORE_CLK0_V (0x1UL << 1) -#define CORE_CLK0_R (0x1UL << 2) -#define CORE_CLK2_V (0x1UL << 15) -#define CORE_CLK2_R (0x1UL << 16) - -#define CLK_LV1_SEL_PRT 0x1UL -#define CLK_LV1_SEL_MUXA (0x1UL << 2) -#define CLK_LV1_SEL_MUXB (0x1UL << 3) +#define CLK_PRT 0x1UL +#define CORE_CLK0_V (0x1UL << 1) +#define CORE_CLK0_R (0x1UL << 2) +#define CORE_CLK2_V (0x1UL << 15) +#define CORE_CLK2_R (0x1UL << 16) + +#define CLK_LV1_SEL_PRT 0x1UL +#define CLK_LV1_SEL_MUXA (0x1UL << 2) +#define CLK_LV1_SEL_MUXB (0x1UL << 3) #define CORE_PLL0_CFG_SHIFT 4 #define CORE_PLL2_CFG_SHIFT 18 diff --git a/arch/sw_64/include/asm/atomic.h b/arch/sw_64/include/asm/atomic.h index 126417a1aeee..66f4437be103 100644 --- a/arch/sw_64/include/asm/atomic.h +++ b/arch/sw_64/include/asm/atomic.h @@ -336,7 +336,7 @@ ATOMIC_OPS(sub) #define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed #define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed -#undef ATOMIC_OPS +#undef ATOMIC_OPS #define ATOMIC_OPS(op, asm) \ ATOMIC_OP(op, asm) \ diff --git a/arch/sw_64/include/asm/barrier.h b/arch/sw_64/include/asm/barrier.h index c691038919cd..5f4a03d700c6 100644 --- a/arch/sw_64/include/asm/barrier.h +++ b/arch/sw_64/include/asm/barrier.h @@ -21,4 +21,4 @@ #include -#endif /* _ASM_SW64_BARRIER_H */ +#endif /* _ASM_SW64_BARRIER_H */ diff --git a/arch/sw_64/include/asm/clock.h b/arch/sw_64/include/asm/clock.h index 88714eb08507..af6872ed9edb 100644 --- a/arch/sw_64/include/asm/clock.h +++ b/arch/sw_64/include/asm/clock.h @@ -53,4 +53,4 @@ void sw64_update_clockevents(unsigned long cpu, u32 freq); void sw64_store_policy(struct cpufreq_policy *policy); unsigned int __sw64_cpufreq_get(struct cpufreq_policy *policy); -#endif /* _ASM_SW64_CLOCK_H */ +#endif /* _ASM_SW64_CLOCK_H */ diff --git a/arch/sw_64/include/asm/dma-mapping.h b/arch/sw_64/include/asm/dma-mapping.h index bb84690eabfe..37fce35b09d5 100644 --- a/arch/sw_64/include/asm/dma-mapping.h +++ b/arch/sw_64/include/asm/dma-mapping.h @@ -9,4 +9,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return dma_ops; } -#endif /* _ASM_SW64_DMA_MAPPING_H */ +#endif /* _ASM_SW64_DMA_MAPPING_H */ diff --git a/arch/sw_64/include/asm/efi.h b/arch/sw_64/include/asm/efi.h index 2bc863e3b836..f061cae2fc8a 100644 --- a/arch/sw_64/include/asm/efi.h +++ b/arch/sw_64/include/asm/efi.h @@ -21,7 +21,7 @@ extern void efi_init(void); __f(args); \ }) -#define ARCH_EFI_IRQ_FLAGS_MASK 0x00000001 +#define ARCH_EFI_IRQ_FLAGS_MASK 0x00000001 /* arch specific definitions used by the stub code */ diff --git a/arch/sw_64/include/asm/hcall.h b/arch/sw_64/include/asm/hcall.h index b5438b477c87..65669e54c0f8 100644 --- a/arch/sw_64/include/asm/hcall.h +++ b/arch/sw_64/include/asm/hcall.h @@ -39,4 +39,4 @@ static inline unsigned long hcall(unsigned long hcall, unsigned long arg0, return __r0; } -#endif /* _ASM_SW64_HCALL_H */ +#endif /* _ASM_SW64_HCALL_H */ diff --git a/arch/sw_64/include/asm/insn.h b/arch/sw_64/include/asm/insn.h index 54a9a2026784..ec0efae3aed0 100644 --- a/arch/sw_64/include/asm/insn.h +++ b/arch/sw_64/include/asm/insn.h @@ -14,8 +14,8 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#ifndef _ASM_SW64_INSN_H -#define _ASM_SW64_INSN_H +#ifndef _ASM_SW64_INSN_H +#define _ASM_SW64_INSN_H #include /* Register numbers */ @@ -28,7 +28,7 @@ enum { #define BR_MAX_DISP 0xfffff /* SW64 instructions are always 32 bits. */ -#define SW64_INSN_SIZE 4 +#define SW64_INSN_SIZE 4 #define ___SW64_RA(a) (((a) & 0x1f) << 21) #define ___SW64_RB(b) (((b) & 0x1f) << 16) @@ -93,4 +93,4 @@ SW64_INSN(fbge, 0xf4000000, 0xfc000000); SW64_INSN(lldw, 0x20000000, 0xfc00f000); SW64_INSN(lldl, 0x20001000, 0xfc00f000); -#endif /* _ASM_SW64_INSN_H */ +#endif /* _ASM_SW64_INSN_H */ diff --git a/arch/sw_64/include/asm/kprobes.h b/arch/sw_64/include/asm/kprobes.h index c19b961a19da..6b7e4548a8bd 100644 --- a/arch/sw_64/include/asm/kprobes.h +++ b/arch/sw_64/include/asm/kprobes.h @@ -19,7 +19,7 @@ #include #include -#define __ARCH_WANT_KPROBES_INSN_SLOT +#define __ARCH_WANT_KPROBES_INSN_SLOT struct kprobe; struct pt_regs; diff --git a/arch/sw_64/include/asm/kvm_host.h b/arch/sw_64/include/asm/kvm_host.h index 6d292c086347..835ccef7490f 100644 --- a/arch/sw_64/include/asm/kvm_host.h +++ b/arch/sw_64/include/asm/kvm_host.h @@ -124,4 +124,4 @@ static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} -#endif /* _ASM_SW64_KVM_HOST_H */ +#endif /* _ASM_SW64_KVM_HOST_H */ diff --git a/arch/sw_64/include/asm/kvm_mmio.h b/arch/sw_64/include/asm/kvm_mmio.h index 9ba31c91902f..c87b259e9395 100644 --- a/arch/sw_64/include/asm/kvm_mmio.h +++ b/arch/sw_64/include/asm/kvm_mmio.h @@ -14,4 +14,4 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, struct hcall_args *hargs); -#endif /* _ASM_SW64_KVM_MMIO_H */ +#endif /* _ASM_SW64_KVM_MMIO_H */ diff --git a/arch/sw_64/include/asm/msi.h b/arch/sw_64/include/asm/msi.h index 079fac0d128e..ca5850eb5957 100644 --- a/arch/sw_64/include/asm/msi.h +++ b/arch/sw_64/include/asm/msi.h @@ -42,6 +42,6 @@ struct irq_alloc_info { irq_hw_number_t hwirq; }; typedef struct irq_alloc_info msi_alloc_info_t; -#endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */ -#endif /* CONFIG_PCI_MSI */ -#endif /* _ASM_SW64_MSI_H */ +#endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */ +#endif /* CONFIG_PCI_MSI */ +#endif /* _ASM_SW64_MSI_H */ diff --git a/arch/sw_64/include/asm/numa.h b/arch/sw_64/include/asm/numa.h index 4ea8b8de248a..930be7831813 100644 --- a/arch/sw_64/include/asm/numa.h +++ b/arch/sw_64/include/asm/numa.h @@ -30,6 +30,6 @@ static inline void set_cpuid_to_node(int cpuid, s16 node) __cpuid_to_node[cpuid] = node; } -#endif /* CONFIG_NUMA */ +#endif /* CONFIG_NUMA */ -#endif /* _ASM_SW64_NUMA_H */ +#endif /* _ASM_SW64_NUMA_H */ diff --git a/arch/sw_64/include/asm/pgtable-4level.h b/arch/sw_64/include/asm/pgtable-4level.h index 8c45f441c520..719e2c5377e3 100644 --- a/arch/sw_64/include/asm/pgtable-4level.h +++ b/arch/sw_64/include/asm/pgtable-4level.h @@ -2,7 +2,7 @@ #ifndef _ASM_SW64_PGTABLE_4LEVEL_H #define _ASM_SW64_PGTABLE_4LEVEL_H -#ifdef __KERNEL__ +#ifdef __KERNEL__ #ifndef __ASSEMBLY__ /* * These are used to make use of C type-checking.. diff --git a/arch/sw_64/include/asm/pgtable.h b/arch/sw_64/include/asm/pgtable.h index 76c782baf242..b451bc94e737 100644 --- a/arch/sw_64/include/asm/pgtable.h +++ b/arch/sw_64/include/asm/pgtable.h @@ -544,7 +544,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, #define mk_pmd(page, prot) pfn_pmd(page_to_pfn(page), (prot)) -#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS extern int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty); diff --git a/arch/sw_64/include/asm/platform.h b/arch/sw_64/include/asm/platform.h index 318b6ca732cd..59418bba9de7 100644 --- a/arch/sw_64/include/asm/platform.h +++ b/arch/sw_64/include/asm/platform.h @@ -14,4 +14,4 @@ extern struct sw64_platform_ops *sw64_platform; extern struct sw64_platform_ops xuelang_ops; -#endif /* _ASM_SW64_PLATFORM_H */ +#endif /* _ASM_SW64_PLATFORM_H */ diff --git a/arch/sw_64/include/asm/stacktrace.h b/arch/sw_64/include/asm/stacktrace.h index ed691a72573b..958c9892fd6d 100644 --- a/arch/sw_64/include/asm/stacktrace.h +++ b/arch/sw_64/include/asm/stacktrace.h @@ -69,4 +69,4 @@ static inline bool on_accessible_stack(struct task_struct *tsk, return false; } -#endif /* _ASM_SW64_STACKTRACE_H */ +#endif /* _ASM_SW64_STACKTRACE_H */ diff --git a/arch/sw_64/include/asm/sw64_init.h b/arch/sw_64/include/asm/sw64_init.h index 2d9140605d0b..9f16bdf2a61a 100644 --- a/arch/sw_64/include/asm/sw64_init.h +++ b/arch/sw_64/include/asm/sw64_init.h @@ -43,4 +43,4 @@ extern struct sw64_chip_init_ops *sw64_chip_init; DECLARE_PER_CPU(unsigned long, hard_node_id); -#endif /* _ASM_SW64_INIT_H */ +#endif /* _ASM_SW64_INIT_H */ diff --git a/arch/sw_64/include/asm/syscall.h b/arch/sw_64/include/asm/syscall.h index 4b784c3d846b..0b1556a460b4 100644 --- a/arch/sw_64/include/asm/syscall.h +++ b/arch/sw_64/include/asm/syscall.h @@ -72,4 +72,4 @@ static inline int syscall_get_arch(struct task_struct *task) return AUDIT_ARCH_SW64; } -#endif /* _ASM_SW64_SYSCALL_H */ +#endif /* _ASM_SW64_SYSCALL_H */ diff --git a/arch/sw_64/include/asm/termios.h b/arch/sw_64/include/asm/termios.h index ef509946675a..9849dd9b58bf 100644 --- a/arch/sw_64/include/asm/termios.h +++ b/arch/sw_64/include/asm/termios.h @@ -78,4 +78,4 @@ #define kernel_termios_to_user_termios(u, k) \ copy_to_user(u, k, sizeof(struct termios)) -#endif /* _ASM_SW64_TERMIOS_H */ +#endif /* _ASM_SW64_TERMIOS_H */ diff --git a/arch/sw_64/include/asm/vcpu.h b/arch/sw_64/include/asm/vcpu.h index 476c396c5aa4..dfefb9dc8651 100644 --- a/arch/sw_64/include/asm/vcpu.h +++ b/arch/sw_64/include/asm/vcpu.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_SW64_VCPU_H -#define _ASM_SW64_VCPU_H +#define _ASM_SW64_VCPU_H #ifndef __ASSEMBLY__ @@ -43,5 +43,5 @@ struct vcpucb { unsigned long vcpu_irq_vector; }; -#endif /* __ASSEMBLY__ */ -#endif /* _ASM_SW64_VCPU_H */ +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_SW64_VCPU_H */ diff --git a/arch/sw_64/include/uapi/asm/errno.h b/arch/sw_64/include/uapi/asm/errno.h index 0d8438f6bd40..94e1eab5d003 100644 --- a/arch/sw_64/include/uapi/asm/errno.h +++ b/arch/sw_64/include/uapi/asm/errno.h @@ -4,124 +4,124 @@ #include -#undef EAGAIN /* 11 in errno-base.h */ - -#define EDEADLK 11 /* Resource deadlock would occur */ - -#define EAGAIN 35 /* Try again */ -#define EWOULDBLOCK EAGAIN /* Operation would block */ -#define EINPROGRESS 36 /* Operation now in progress */ -#define EALREADY 37 /* Operation already in progress */ -#define ENOTSOCK 38 /* Socket operation on non-socket */ -#define EDESTADDRREQ 39 /* Destination address required */ -#define EMSGSIZE 40 /* Message too long */ -#define EPROTOTYPE 41 /* Protocol wrong type for socket */ -#define ENOPROTOOPT 42 /* Protocol not available */ -#define EPROTONOSUPPORT 43 /* Protocol not supported */ -#define ESOCKTNOSUPPORT 44 /* Socket type not supported */ -#define EOPNOTSUPP 45 /* Operation not supported on transport endpoint */ -#define EPFNOSUPPORT 46 /* Protocol family not supported */ -#define EAFNOSUPPORT 47 /* Address family not supported by protocol */ -#define EADDRINUSE 48 /* Address already in use */ -#define EADDRNOTAVAIL 49 /* Cannot assign requested address */ -#define ENETDOWN 50 /* Network is down */ -#define ENETUNREACH 51 /* Network is unreachable */ -#define ENETRESET 52 /* Network dropped connection because of reset */ -#define ECONNABORTED 53 /* Software caused connection abort */ -#define ECONNRESET 54 /* Connection reset by peer */ -#define ENOBUFS 55 /* No buffer space available */ -#define EISCONN 56 /* Transport endpoint is already connected */ -#define ENOTCONN 57 /* Transport endpoint is not connected */ -#define ESHUTDOWN 58 /* Cannot send after transport endpoint shutdown */ -#define ETOOMANYREFS 59 /* Too many references: cannot splice */ -#define ETIMEDOUT 60 /* Connection timed out */ -#define ECONNREFUSED 61 /* Connection refused */ -#define ELOOP 62 /* Too many symbolic links encountered */ -#define ENAMETOOLONG 63 /* File name too long */ -#define EHOSTDOWN 64 /* Host is down */ -#define EHOSTUNREACH 65 /* No route to host */ -#define ENOTEMPTY 66 /* Directory not empty */ - -#define EUSERS 68 /* Too many users */ -#define EDQUOT 69 /* Quota exceeded */ -#define ESTALE 70 /* Stale NFS file handle */ -#define EREMOTE 71 /* Object is remote */ - -#define ENOLCK 77 /* No record locks available */ -#define ENOSYS 78 /* Function not implemented */ - -#define ENOMSG 80 /* No message of desired type */ -#define EIDRM 81 /* Identifier removed */ -#define ENOSR 82 /* Out of streams resources */ -#define ETIME 83 /* Timer expired */ -#define EBADMSG 84 /* Not a data message */ -#define EPROTO 85 /* Protocol error */ -#define ENODATA 86 /* No data available */ -#define ENOSTR 87 /* Device not a stream */ - -#define ENOPKG 92 /* Package not installed */ - -#define EILSEQ 116 /* Illegal byte sequence */ +#undef EAGAIN /* 11 in errno-base.h */ + +#define EDEADLK 11 /* Resource deadlock would occur */ + +#define EAGAIN 35 /* Try again */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define EINPROGRESS 36 /* Operation now in progress */ +#define EALREADY 37 /* Operation already in progress */ +#define ENOTSOCK 38 /* Socket operation on non-socket */ +#define EDESTADDRREQ 39 /* Destination address required */ +#define EMSGSIZE 40 /* Message too long */ +#define EPROTOTYPE 41 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 42 /* Protocol not available */ +#define EPROTONOSUPPORT 43 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 44 /* Socket type not supported */ +#define EOPNOTSUPP 45 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 46 /* Protocol family not supported */ +#define EAFNOSUPPORT 47 /* Address family not supported by protocol */ +#define EADDRINUSE 48 /* Address already in use */ +#define EADDRNOTAVAIL 49 /* Cannot assign requested address */ +#define ENETDOWN 50 /* Network is down */ +#define ENETUNREACH 51 /* Network is unreachable */ +#define ENETRESET 52 /* Network dropped connection because of reset */ +#define ECONNABORTED 53 /* Software caused connection abort */ +#define ECONNRESET 54 /* Connection reset by peer */ +#define ENOBUFS 55 /* No buffer space available */ +#define EISCONN 56 /* Transport endpoint is already connected */ +#define ENOTCONN 57 /* Transport endpoint is not connected */ +#define ESHUTDOWN 58 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 59 /* Too many references: cannot splice */ +#define ETIMEDOUT 60 /* Connection timed out */ +#define ECONNREFUSED 61 /* Connection refused */ +#define ELOOP 62 /* Too many symbolic links encountered */ +#define ENAMETOOLONG 63 /* File name too long */ +#define EHOSTDOWN 64 /* Host is down */ +#define EHOSTUNREACH 65 /* No route to host */ +#define ENOTEMPTY 66 /* Directory not empty */ + +#define EUSERS 68 /* Too many users */ +#define EDQUOT 69 /* Quota exceeded */ +#define ESTALE 70 /* Stale NFS file handle */ +#define EREMOTE 71 /* Object is remote */ + +#define ENOLCK 77 /* No record locks available */ +#define ENOSYS 78 /* Function not implemented */ + +#define ENOMSG 80 /* No message of desired type */ +#define EIDRM 81 /* Identifier removed */ +#define ENOSR 82 /* Out of streams resources */ +#define ETIME 83 /* Timer expired */ +#define EBADMSG 84 /* Not a data message */ +#define EPROTO 85 /* Protocol error */ +#define ENODATA 86 /* No data available */ +#define ENOSTR 87 /* Device not a stream */ + +#define ENOPKG 92 /* Package not installed */ + +#define EILSEQ 116 /* Illegal byte sequence */ /* The following are just random noise.. */ -#define ECHRNG 88 /* Channel number out of range */ -#define EL2NSYNC 89 /* Level 2 not synchronized */ -#define EL3HLT 90 /* Level 3 halted */ -#define EL3RST 91 /* Level 3 reset */ - -#define ELNRNG 93 /* Link number out of range */ -#define EUNATCH 94 /* Protocol driver not attached */ -#define ENOCSI 95 /* No CSI structure available */ -#define EL2HLT 96 /* Level 2 halted */ -#define EBADE 97 /* Invalid exchange */ -#define EBADR 98 /* Invalid request descriptor */ -#define EXFULL 99 /* Exchange full */ -#define ENOANO 100 /* No anode */ -#define EBADRQC 101 /* Invalid request code */ -#define EBADSLT 102 /* Invalid slot */ - -#define EDEADLOCK EDEADLK - -#define EBFONT 104 /* Bad font file format */ -#define ENONET 105 /* Machine is not on the network */ -#define ENOLINK 106 /* Link has been severed */ -#define EADV 107 /* Advertise error */ -#define ESRMNT 108 /* Srmount error */ -#define ECOMM 109 /* Communication error on send */ -#define EMULTIHOP 110 /* Multihop attempted */ -#define EDOTDOT 111 /* RFS specific error */ -#define EOVERFLOW 112 /* Value too large for defined data type */ -#define ENOTUNIQ 113 /* Name not unique on network */ -#define EBADFD 114 /* File descriptor in bad state */ -#define EREMCHG 115 /* Remote address changed */ - -#define EUCLEAN 117 /* Structure needs cleaning */ -#define ENOTNAM 118 /* Not a XENIX named type file */ -#define ENAVAIL 119 /* No XENIX semaphores available */ -#define EISNAM 120 /* Is a named type file */ -#define EREMOTEIO 121 /* Remote I/O error */ - -#define ELIBACC 122 /* Can not access a needed shared library */ -#define ELIBBAD 123 /* Accessing a corrupted shared library */ -#define ELIBSCN 124 /* .lib section in a.out corrupted */ -#define ELIBMAX 125 /* Attempting to link in too many shared libraries */ -#define ELIBEXEC 126 /* Cannot exec a shared library directly */ -#define ERESTART 127 /* Interrupted system call should be restarted */ -#define ESTRPIPE 128 /* Streams pipe error */ +#define ECHRNG 88 /* Channel number out of range */ +#define EL2NSYNC 89 /* Level 2 not synchronized */ +#define EL3HLT 90 /* Level 3 halted */ +#define EL3RST 91 /* Level 3 reset */ + +#define ELNRNG 93 /* Link number out of range */ +#define EUNATCH 94 /* Protocol driver not attached */ +#define ENOCSI 95 /* No CSI structure available */ +#define EL2HLT 96 /* Level 2 halted */ +#define EBADE 97 /* Invalid exchange */ +#define EBADR 98 /* Invalid request descriptor */ +#define EXFULL 99 /* Exchange full */ +#define ENOANO 100 /* No anode */ +#define EBADRQC 101 /* Invalid request code */ +#define EBADSLT 102 /* Invalid slot */ + +#define EDEADLOCK EDEADLK + +#define EBFONT 104 /* Bad font file format */ +#define ENONET 105 /* Machine is not on the network */ +#define ENOLINK 106 /* Link has been severed */ +#define EADV 107 /* Advertise error */ +#define ESRMNT 108 /* Srmount error */ +#define ECOMM 109 /* Communication error on send */ +#define EMULTIHOP 110 /* Multihop attempted */ +#define EDOTDOT 111 /* RFS specific error */ +#define EOVERFLOW 112 /* Value too large for defined data type */ +#define ENOTUNIQ 113 /* Name not unique on network */ +#define EBADFD 114 /* File descriptor in bad state */ +#define EREMCHG 115 /* Remote address changed */ + +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ + +#define ELIBACC 122 /* Can not access a needed shared library */ +#define ELIBBAD 123 /* Accessing a corrupted shared library */ +#define ELIBSCN 124 /* .lib section in a.out corrupted */ +#define ELIBMAX 125 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 126 /* Cannot exec a shared library directly */ +#define ERESTART 127 /* Interrupted system call should be restarted */ +#define ESTRPIPE 128 /* Streams pipe error */ #define ENOMEDIUM 129 /* No medium found */ #define EMEDIUMTYPE 130 /* Wrong medium type */ -#define ECANCELED 131 /* Operation Cancelled */ -#define ENOKEY 132 /* Required key not available */ -#define EKEYEXPIRED 133 /* Key has expired */ -#define EKEYREVOKED 134 /* Key has been revoked */ -#define EKEYREJECTED 135 /* Key was rejected by service */ +#define ECANCELED 131 /* Operation Cancelled */ +#define ENOKEY 132 /* Required key not available */ +#define EKEYEXPIRED 133 /* Key has expired */ +#define EKEYREVOKED 134 /* Key has been revoked */ +#define EKEYREJECTED 135 /* Key was rejected by service */ /* for robust mutexes */ -#define EOWNERDEAD 136 /* Owner died */ -#define ENOTRECOVERABLE 137 /* State not recoverable */ +#define EOWNERDEAD 136 /* Owner died */ +#define ENOTRECOVERABLE 137 /* State not recoverable */ -#define ERFKILL 138 /* Operation not possible due to RF-kill */ +#define ERFKILL 138 /* Operation not possible due to RF-kill */ #define EHWPOISON 139 /* Memory page has hardware error */ diff --git a/arch/sw_64/include/uapi/asm/ioctls.h b/arch/sw_64/include/uapi/asm/ioctls.h index db8e456290e6..751a07fd0303 100644 --- a/arch/sw_64/include/uapi/asm/ioctls.h +++ b/arch/sw_64/include/uapi/asm/ioctls.h @@ -34,8 +34,8 @@ #define TIOCSWINSZ _IOW('t', 103, struct winsize) #define TIOCGWINSZ _IOR('t', 104, struct winsize) -#define TIOCSTART _IO('t', 110) /* start output, like ^Q */ -#define TIOCSTOP _IO('t', 111) /* stop output, like ^S */ +#define TIOCSTART _IO('t', 110) /* start output, like ^Q */ +#define TIOCSTOP _IO('t', 111) /* stop output, like ^S */ #define TIOCOUTQ _IOR('t', 115, int) /* output queue size */ #define TIOCGLTC _IOR('t', 116, struct ltchars) diff --git a/arch/sw_64/include/uapi/asm/kvm.h b/arch/sw_64/include/uapi/asm/kvm.h index 126c2a1d7411..254d6cbf1eb1 100644 --- a/arch/sw_64/include/uapi/asm/kvm.h +++ b/arch/sw_64/include/uapi/asm/kvm.h @@ -114,4 +114,4 @@ struct kvm_sync_regs { struct kvm_sregs { }; -#endif /* _UAPI_ASM_SW64_KVM_H */ +#endif /* _UAPI_ASM_SW64_KVM_H */ diff --git a/arch/sw_64/include/uapi/asm/regdef.h b/arch/sw_64/include/uapi/asm/regdef.h index ad4475b79435..7460a987c726 100644 --- a/arch/sw_64/include/uapi/asm/regdef.h +++ b/arch/sw_64/include/uapi/asm/regdef.h @@ -13,14 +13,14 @@ #define t6 $7 #define t7 $8 -#define s0 $9 /* saved-registers (callee-saved registers) */ -#define s1 $10 -#define s2 $11 -#define s3 $12 -#define s4 $13 -#define s5 $14 -#define s6 $15 -#define fp s6 /* frame-pointer (s6 in frame-less procedures) */ +#define s0 $9 /* saved-registers (callee-saved registers) */ +#define s1 $10 +#define s2 $11 +#define s3 $12 +#define s4 $13 +#define s5 $14 +#define s6 $15 +#define fp s6 /* frame-pointer (s6 in frame-less procedures) */ #define a0 $16 /* argument registers (caller-saved) */ #define a1 $17 diff --git a/arch/sw_64/include/uapi/asm/termbits.h b/arch/sw_64/include/uapi/asm/termbits.h index 83de6ff63234..aaadb1d54f92 100644 --- a/arch/sw_64/include/uapi/asm/termbits.h +++ b/arch/sw_64/include/uapi/asm/termbits.h @@ -87,73 +87,73 @@ struct ktermios { #define OFILL 00000100 #define OFDEL 00000200 #define NLDLY 00001400 -#define NL0 00000000 -#define NL1 00000400 -#define NL2 00001000 -#define NL3 00001400 +#define NL0 00000000 +#define NL1 00000400 +#define NL2 00001000 +#define NL3 00001400 #define TABDLY 00006000 -#define TAB0 00000000 -#define TAB1 00002000 -#define TAB2 00004000 -#define TAB3 00006000 -#define CRDLY 00030000 -#define CR0 00000000 -#define CR1 00010000 -#define CR2 00020000 -#define CR3 00030000 +#define TAB0 00000000 +#define TAB1 00002000 +#define TAB2 00004000 +#define TAB3 00006000 +#define CRDLY 00030000 +#define CR0 00000000 +#define CR1 00010000 +#define CR2 00020000 +#define CR3 00030000 #define FFDLY 00040000 -#define FF0 00000000 -#define FF1 00040000 +#define FF0 00000000 +#define FF1 00040000 #define BSDLY 00100000 -#define BS0 00000000 -#define BS1 00100000 +#define BS0 00000000 +#define BS1 00100000 #define VTDLY 00200000 -#define VT0 00000000 -#define VT1 00200000 +#define VT0 00000000 +#define VT1 00200000 #define XTABS 01000000 /* Hmm.. Linux/i386 considers this part of TABDLY.. */ /* c_cflag bit meaning */ #define CBAUD 0000037 -#define B0 0000000 /* hang up */ -#define B50 0000001 -#define B75 0000002 -#define B110 0000003 -#define B134 0000004 -#define B150 0000005 -#define B200 0000006 -#define B300 0000007 -#define B600 0000010 -#define B1200 0000011 -#define B1800 0000012 -#define B2400 0000013 -#define B4800 0000014 -#define B9600 0000015 -#define B19200 0000016 -#define B38400 0000017 +#define B0 0000000 /* hang up */ +#define B50 0000001 +#define B75 0000002 +#define B110 0000003 +#define B134 0000004 +#define B150 0000005 +#define B200 0000006 +#define B300 0000007 +#define B600 0000010 +#define B1200 0000011 +#define B1800 0000012 +#define B2400 0000013 +#define B4800 0000014 +#define B9600 0000015 +#define B19200 0000016 +#define B38400 0000017 #define EXTA B19200 #define EXTB B38400 #define CBAUDEX 0000000 -#define B57600 00020 -#define B115200 00021 -#define B230400 00022 -#define B460800 00023 -#define B500000 00024 -#define B576000 00025 -#define B921600 00026 -#define B1000000 00027 -#define B1152000 00030 -#define B1500000 00031 -#define B2000000 00032 -#define B2500000 00033 -#define B3000000 00034 -#define B3500000 00035 -#define B4000000 00036 +#define B57600 00020 +#define B115200 00021 +#define B230400 00022 +#define B460800 00023 +#define B500000 00024 +#define B576000 00025 +#define B921600 00026 +#define B1000000 00027 +#define B1152000 00030 +#define B1500000 00031 +#define B2000000 00032 +#define B2500000 00033 +#define B3000000 00034 +#define B3500000 00035 +#define B4000000 00036 #define CSIZE 00001400 -#define CS5 00000000 -#define CS6 00000400 -#define CS7 00001000 -#define CS8 00001400 +#define CS5 00000000 +#define CS6 00000400 +#define CS7 00001000 +#define CS8 00001400 #define CSTOPB 00002000 #define CREAD 00004000 @@ -184,19 +184,19 @@ struct ktermios { #define EXTPROC 0x10000000 /* Values for the ACTION argument to `tcflow'. */ -#define TCOOFF 0 -#define TCOON 1 -#define TCIOFF 2 -#define TCION 3 +#define TCOOFF 0 +#define TCOON 1 +#define TCIOFF 2 +#define TCION 3 /* Values for the QUEUE_SELECTOR argument to `tcflush'. */ -#define TCIFLUSH 0 -#define TCOFLUSH 1 -#define TCIOFLUSH 2 +#define TCIFLUSH 0 +#define TCOFLUSH 1 +#define TCIOFLUSH 2 /* Values for the OPTIONAL_ACTIONS argument to `tcsetattr'. */ -#define TCSANOW 0 -#define TCSADRAIN 1 -#define TCSAFLUSH 2 +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 #endif /* _UAPI_ASM_SW64_TERMBITS_H */ diff --git a/arch/sw_64/kernel/acpi.c b/arch/sw_64/kernel/acpi.c index a0b5c4a57a07..61f2948f1781 100644 --- a/arch/sw_64/kernel/acpi.c +++ b/arch/sw_64/kernel/acpi.c @@ -97,7 +97,7 @@ int acpi_unmap_lsapic(int cpu) return 0; } EXPORT_SYMBOL(acpi_unmap_lsapic); -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ u8 acpi_checksum(u8 *table, u32 length) { @@ -361,7 +361,7 @@ int acpi_unmap_cpu(int cpu) return 0; } EXPORT_SYMBOL(acpi_unmap_cpu); -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ void __init acpi_boot_table_init(void) diff --git a/arch/sw_64/kernel/pci_impl.h b/arch/sw_64/kernel/pci_impl.h index 6025145cb1c5..41d83a41db56 100644 --- a/arch/sw_64/kernel/pci_impl.h +++ b/arch/sw_64/kernel/pci_impl.h @@ -4,7 +4,7 @@ * with the PCI initialization routines. */ #ifndef _SW64_KERNEL_PCI_IMPL_H -#define _SW64_KERNEL_PCI_IMPL_H +#define _SW64_KERNEL_PCI_IMPL_H #include diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index 2d2a8c6d4b4e..f68d93b5a7b7 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -785,7 +785,7 @@ setup_arch(char **cmdline_p) strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); strlcat(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); } -#endif /* CMDLINE_EXTEND */ +#endif /* CMDLINE_EXTEND */ #endif if (IS_ENABLED(CONFIG_SW64_CHIP3_ASIC_DEBUG) && IS_ENABLED(CONFIG_SW64_CHIP3)) { diff --git a/arch/sw_64/kernel/time.c b/arch/sw_64/kernel/time.c index 3aa55c886e38..32690e78849b 100644 --- a/arch/sw_64/kernel/time.c +++ b/arch/sw_64/kernel/time.c @@ -60,7 +60,7 @@ void arch_irq_work_raise(void) set_irq_work_pending_flag(); } -#else /* CONFIG_IRQ_WORK */ +#else /* CONFIG_IRQ_WORK */ #define test_irq_work_pending() 0 #define clear_irq_work_pending() diff --git a/arch/sw_64/math-emu/math.c b/arch/sw_64/math-emu/math.c index 9f281d82ad83..6da3aadcff88 100644 --- a/arch/sw_64/math-emu/math.c +++ b/arch/sw_64/math-emu/math.c @@ -188,14 +188,14 @@ void write_fp_reg_s(unsigned long reg, unsigned long val_p0, unsigned long p1, unsigned long p2, unsigned long p3); void write_fp_reg_d(unsigned long reg, unsigned long val_p0, unsigned long p1, unsigned long p2, unsigned long p3); -#define LOW_64_WORKING 1 +#define LOW_64_WORKING 1 #define HIGH_64_WORKING 2 /* * End for sw64 */ -#define OPC_HMC 0x00 +#define OPC_HMC 0x00 #define OPC_INTA 0x10 #define OPC_INTL 0x11 #define OPC_INTS 0x12 @@ -205,7 +205,7 @@ void write_fp_reg_d(unsigned long reg, unsigned long val_p0, #define OPC_FLTI 0x16 #define OPC_FLTL 0x17 #define OPC_MISC 0x18 -#define OPC_JSR 0x1a +#define OPC_JSR 0x1a #define FOP_SRC_S 0 #define FOP_SRC_T 2 @@ -295,9 +295,9 @@ void cleanup_module(void) sw64_fp_emul = save_emul; } -#undef sw64_fp_emul_imprecise +#undef sw64_fp_emul_imprecise #define sw64_fp_emul_imprecise do_sw_fp_emul_imprecise -#undef sw64_fp_emul +#undef sw64_fp_emul #define sw64_fp_emul do_sw_fp_emul #endif /* MODULE */ -- Gitee From 7e3cd3557bdcd5f4038c23fc8fd1e0e2bdfc5ab3 Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Wed, 7 Sep 2022 15:20:01 +0800 Subject: [PATCH 57/77] sw64: unify header guard naming Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Use the canonical header guard naming of the full path to the header. Signed-off-by: Mao Minkai Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/asm-offsets.h | 2 +- arch/sw_64/include/asm/asm-prototypes.h | 2 +- arch/sw_64/include/asm/bugs.h | 2 +- arch/sw_64/include/asm/cache.h | 2 +- arch/sw_64/include/asm/checksum.h | 2 +- arch/sw_64/include/asm/chip3_io.h | 2 +- arch/sw_64/include/asm/cmpxchg.h | 1 + arch/sw_64/include/asm/core.h | 2 +- arch/sw_64/include/asm/delay.h | 2 +- arch/sw_64/include/asm/device.h | 2 +- arch/sw_64/include/asm/dmi.h | 2 +- arch/sw_64/include/asm/early_ioremap.h | 2 +- arch/sw_64/include/asm/extable.h | 2 +- arch/sw_64/include/asm/hmcall.h | 6 +++--- arch/sw_64/include/asm/hw_init.h | 2 +- arch/sw_64/include/asm/hw_irq.h | 2 +- arch/sw_64/include/asm/irq_impl.h | 2 +- arch/sw_64/include/asm/jump_label.h | 6 +++--- arch/sw_64/include/asm/kvm_cma.h | 6 +++--- arch/sw_64/include/asm/kvm_emulate.h | 2 +- arch/sw_64/include/asm/kvm_para.h | 2 +- arch/sw_64/include/asm/kvm_timer.h | 2 +- arch/sw_64/include/asm/linkage.h | 2 +- arch/sw_64/include/asm/mmu.h | 2 +- arch/sw_64/include/asm/ptrace.h | 2 +- arch/sw_64/include/asm/setup.h | 2 +- arch/sw_64/include/asm/sfp-machine.h | 2 +- arch/sw_64/include/asm/signal.h | 2 +- arch/sw_64/include/asm/smp.h | 2 +- arch/sw_64/include/asm/spinlock_types.h | 2 +- arch/sw_64/include/asm/suspend.h | 6 +++--- arch/sw_64/include/asm/sw64_init.h | 6 +++--- arch/sw_64/include/asm/sw64io.h | 2 +- arch/sw_64/include/asm/tc.h | 2 +- arch/sw_64/include/asm/timex.h | 2 +- arch/sw_64/include/asm/tlb.h | 2 +- arch/sw_64/include/asm/wrperfmon.h | 2 +- arch/sw_64/include/asm/xchg.h | 9 ++++++--- arch/sw_64/include/asm/xor.h | 2 +- arch/sw_64/include/uapi/asm/errno.h | 2 +- arch/sw_64/include/uapi/asm/fcntl.h | 2 +- arch/sw_64/include/uapi/asm/hmcall.h | 2 +- arch/sw_64/include/uapi/asm/perf_regs.h | 6 +++--- arch/sw_64/include/uapi/asm/setup.h | 2 +- arch/sw_64/include/uapi/asm/sigcontext.h | 2 +- arch/sw_64/include/uapi/asm/siginfo.h | 2 +- arch/sw_64/include/uapi/asm/stat.h | 2 +- arch/sw_64/kernel/pci_impl.h | 2 +- arch/sw_64/kernel/proto.h | 2 +- arch/sw_64/kvm/irq.h | 6 +++--- arch/sw_64/math-emu/sfp-util.h | 5 +++++ arch/sw_64/net/bpf_jit.h | 6 +++--- arch/sw_64/oprofile/op_impl.h | 2 +- arch/sw_64/tools/relocs.h | 6 +++--- 54 files changed, 81 insertions(+), 72 deletions(-) diff --git a/arch/sw_64/include/asm/asm-offsets.h b/arch/sw_64/include/asm/asm-offsets.h index 72cd408a9c6f..5ddfd96ccb79 100644 --- a/arch/sw_64/include/asm/asm-offsets.h +++ b/arch/sw_64/include/asm/asm-offsets.h @@ -4,4 +4,4 @@ #include -#endif +#endif /* _ASM_SW64_ASM_OFFSETS_H */ diff --git a/arch/sw_64/include/asm/asm-prototypes.h b/arch/sw_64/include/asm/asm-prototypes.h index 15bad8ef6883..67746d6bffb7 100644 --- a/arch/sw_64/include/asm/asm-prototypes.h +++ b/arch/sw_64/include/asm/asm-prototypes.h @@ -19,4 +19,4 @@ extern void __remlu(void); extern void __divwu(void); extern void __remwu(void); -#endif +#endif /* _ASM_SW64_ASM_PROTOTYPES_H */ diff --git a/arch/sw_64/include/asm/bugs.h b/arch/sw_64/include/asm/bugs.h index c4a336fe04a2..1cd94ed171fb 100644 --- a/arch/sw_64/include/asm/bugs.h +++ b/arch/sw_64/include/asm/bugs.h @@ -6,4 +6,4 @@ static void check_bugs(void) { } -#endif +#endif /* _ASM_SW64_BUGS_H */ diff --git a/arch/sw_64/include/asm/cache.h b/arch/sw_64/include/asm/cache.h index 1dca2e2e04a4..fade2e095b8b 100644 --- a/arch/sw_64/include/asm/cache.h +++ b/arch/sw_64/include/asm/cache.h @@ -8,4 +8,4 @@ #define L1_CACHE_SHIFT 7 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#endif +#endif /* _ASM_SW64_CACHE_H */ diff --git a/arch/sw_64/include/asm/checksum.h b/arch/sw_64/include/asm/checksum.h index 284c1678f51e..7f3768290402 100644 --- a/arch/sw_64/include/asm/checksum.h +++ b/arch/sw_64/include/asm/checksum.h @@ -123,4 +123,4 @@ static inline unsigned short from64to16(unsigned long x) return out_v.us[0] + out_v.us[1]; } -#endif +#endif /* _ASM_SW64_CHECKSUM_H */ diff --git a/arch/sw_64/include/asm/chip3_io.h b/arch/sw_64/include/asm/chip3_io.h index 14d02c080607..3bfbc2bdafe7 100644 --- a/arch/sw_64/include/asm/chip3_io.h +++ b/arch/sw_64/include/asm/chip3_io.h @@ -314,4 +314,4 @@ enum { GPIO_SWPORTA_DDR = GPIO_BASE | 0x200UL, }; /*--------------------------------------------------------------------------*/ -#endif +#endif /* _ASM_SW64_CHIP3_IO_H */ diff --git a/arch/sw_64/include/asm/cmpxchg.h b/arch/sw_64/include/asm/cmpxchg.h index e07abc47c7dd..7f2d103db9c2 100644 --- a/arch/sw_64/include/asm/cmpxchg.h +++ b/arch/sw_64/include/asm/cmpxchg.h @@ -39,6 +39,7 @@ #endif #undef ____xchg #undef ____cmpxchg +#undef _ASM_SW64_XCHG_H #define ____xchg(type, args...) __xchg ##type(args) #define ____cmpxchg(type, args...) __cmpxchg ##type(args) #include diff --git a/arch/sw_64/include/asm/core.h b/arch/sw_64/include/asm/core.h index 72d752c87412..e5e4cc138102 100644 --- a/arch/sw_64/include/asm/core.h +++ b/arch/sw_64/include/asm/core.h @@ -45,4 +45,4 @@ extern void entSys(void); extern void entUna(void); /* head.S */ extern void __smp_callin(unsigned long); -#endif +#endif /* _ASM_SW64_CORE_H */ diff --git a/arch/sw_64/include/asm/delay.h b/arch/sw_64/include/asm/delay.h index 45112c7c3c01..f4080753e954 100644 --- a/arch/sw_64/include/asm/delay.h +++ b/arch/sw_64/include/asm/delay.h @@ -8,4 +8,4 @@ extern void udelay(unsigned long usecs); extern void ndelay(unsigned long nsecs); #define ndelay ndelay -#endif /* defined(_ASM_SW64_DELAY_H) */ +#endif /* _ASM_SW64_DELAY_H */ diff --git a/arch/sw_64/include/asm/device.h b/arch/sw_64/include/asm/device.h index dadd756d6934..bc1408c47dd3 100644 --- a/arch/sw_64/include/asm/device.h +++ b/arch/sw_64/include/asm/device.h @@ -10,4 +10,4 @@ struct dev_archdata { struct pdev_archdata { }; -#endif +#endif /* _ASM_SW64_DEVICE_H */ diff --git a/arch/sw_64/include/asm/dmi.h b/arch/sw_64/include/asm/dmi.h index 5142aa66ea45..05e80c9a3a76 100644 --- a/arch/sw_64/include/asm/dmi.h +++ b/arch/sw_64/include/asm/dmi.h @@ -27,4 +27,4 @@ #define dmi_unmap(x) early_iounmap(x, 0) #define dmi_alloc(l) kzalloc(l, GFP_KERNEL) -#endif +#endif /* _ASM_SW64_DMI_H */ diff --git a/arch/sw_64/include/asm/early_ioremap.h b/arch/sw_64/include/asm/early_ioremap.h index 930c6bf36ad3..5459cba8a677 100644 --- a/arch/sw_64/include/asm/early_ioremap.h +++ b/arch/sw_64/include/asm/early_ioremap.h @@ -27,4 +27,4 @@ static inline void early_iounmap(volatile void __iomem *addr, unsigned long size } #define early_memunmap(addr, size) early_iounmap(addr, size) -#endif +#endif /* _ASM_SW64_EARLY_IOREMAP_H */ diff --git a/arch/sw_64/include/asm/extable.h b/arch/sw_64/include/asm/extable.h index ae753772a45a..3680b4a918a6 100644 --- a/arch/sw_64/include/asm/extable.h +++ b/arch/sw_64/include/asm/extable.h @@ -56,4 +56,4 @@ struct exception_table_entry { extern short regoffsets[]; #define map_regs(r) (*(unsigned long *)((char *)regs + regoffsets[r])) -#endif +#endif /* _ASM_SW64_EXTABLE_H */ diff --git a/arch/sw_64/include/asm/hmcall.h b/arch/sw_64/include/asm/hmcall.h index e85397ab06a1..04fcafac9e80 100644 --- a/arch/sw_64/include/asm/hmcall.h +++ b/arch/sw_64/include/asm/hmcall.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_HMC_H -#define _ASM_SW64_HMC_H +#ifndef _ASM_SW64_HMCALL_H +#define _ASM_SW64_HMCALL_H /* * Common HMC-code @@ -216,4 +216,4 @@ __CALL_HMC_W1(wrtp, unsigned long); #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ -#endif /* _ASM_SW64_HMC_H */ +#endif /* _ASM_SW64_HMCALL_H */ diff --git a/arch/sw_64/include/asm/hw_init.h b/arch/sw_64/include/asm/hw_init.h index 8a28aac2e54f..81dd2581e0da 100644 --- a/arch/sw_64/include/asm/hw_init.h +++ b/arch/sw_64/include/asm/hw_init.h @@ -162,4 +162,4 @@ DECLARE_STATIC_KEY_FALSE(run_mode_emul_key); (((val) & CACHE_INDEX_BITS_MASK) >> CACHE_INDEX_BITS_SHIFT) #define current_cpu_data cpu_data[smp_processor_id()] -#endif /* HW_INIT_H */ +#endif /* _ASM_SW64_HW_INIT_H */ diff --git a/arch/sw_64/include/asm/hw_irq.h b/arch/sw_64/include/asm/hw_irq.h index f6fd1d802abd..ad5aed26efb7 100644 --- a/arch/sw_64/include/asm/hw_irq.h +++ b/arch/sw_64/include/asm/hw_irq.h @@ -13,4 +13,4 @@ DECLARE_PER_CPU(unsigned long, irq_pmi_count); typedef unsigned int vector_irq_t[PERCPU_MSI_IRQS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); #endif -#endif +#endif /* _ASM_SW64_HW_IRQ_H */ diff --git a/arch/sw_64/include/asm/irq_impl.h b/arch/sw_64/include/asm/irq_impl.h index 48dbc486a126..797af433a126 100644 --- a/arch/sw_64/include/asm/irq_impl.h +++ b/arch/sw_64/include/asm/irq_impl.h @@ -45,4 +45,4 @@ extern void handle_ipi(struct pt_regs *regs); extern void __init sw64_init_irq(void); extern irqreturn_t timer_interrupt(int irq, void *dev); -#endif +#endif /* _ASM_SW64_IRQ_IMPL_H */ diff --git a/arch/sw_64/include/asm/jump_label.h b/arch/sw_64/include/asm/jump_label.h index 78d3fb6246f0..32fbf7573b20 100644 --- a/arch/sw_64/include/asm/jump_label.h +++ b/arch/sw_64/include/asm/jump_label.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_SW64_JUMP_LABEL_H -#define __ASM_SW64_JUMP_LABEL_H +#ifndef _ASM_SW64_JUMP_LABEL_H +#define _ASM_SW64_JUMP_LABEL_H #ifndef __ASSEMBLY__ @@ -47,4 +47,4 @@ struct jump_entry { }; #endif /* __ASSEMBLY__ */ -#endif /* __ASM_SW64_JUMP_LABEL_H */ +#endif /* _ASM_SW64_JUMP_LABEL_H */ diff --git a/arch/sw_64/include/asm/kvm_cma.h b/arch/sw_64/include/asm/kvm_cma.h index 192bca436380..d50ba599ceb7 100644 --- a/arch/sw_64/include/asm/kvm_cma.h +++ b/arch/sw_64/include/asm/kvm_cma.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_KVM_CMA_H__ -#define _ASM_SW64_KVM_CMA_H__ +#ifndef _ASM_SW64_KVM_CMA_H +#define _ASM_SW64_KVM_CMA_H #include @@ -8,4 +8,4 @@ extern int __init kvm_cma_declare_contiguous(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, const char *name, struct cma **res_cma); -#endif +#endif /* _ASM_SW64_KVM_CMA_H */ diff --git a/arch/sw_64/include/asm/kvm_emulate.h b/arch/sw_64/include/asm/kvm_emulate.h index d842008f189a..915aa6c0bce2 100644 --- a/arch/sw_64/include/asm/kvm_emulate.h +++ b/arch/sw_64/include/asm/kvm_emulate.h @@ -43,4 +43,4 @@ unsigned int interrupt_pending(struct kvm_vcpu *vcpu, bool *more); void clear_vcpu_irq(struct kvm_vcpu *vcpu); void inject_vcpu_irq(struct kvm_vcpu *vcpu, unsigned int irq); void try_deliver_interrupt(struct kvm_vcpu *vcpu, unsigned int irq, bool more); -#endif +#endif /* _ASM_SW64_KVM_EMULATE_H */ diff --git a/arch/sw_64/include/asm/kvm_para.h b/arch/sw_64/include/asm/kvm_para.h index ba78c5371570..442f1c7d9f83 100644 --- a/arch/sw_64/include/asm/kvm_para.h +++ b/arch/sw_64/include/asm/kvm_para.h @@ -23,4 +23,4 @@ static inline unsigned long kvm_hypercall3(unsigned long num, : "$1", "$22", "$23", "$24", "$25"); return __r0; } -#endif +#endif /* _ASM_SW64_KVM_PARA_H */ diff --git a/arch/sw_64/include/asm/kvm_timer.h b/arch/sw_64/include/asm/kvm_timer.h index be50bba9c4c6..8080873c684f 100644 --- a/arch/sw_64/include/asm/kvm_timer.h +++ b/arch/sw_64/include/asm/kvm_timer.h @@ -6,4 +6,4 @@ void set_timer(struct kvm_vcpu *vcpu, unsigned long delta); void set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); enum hrtimer_restart clockdev_fn(struct hrtimer *timer); -#endif +#endif /* _ASM_SW64_KVM_TIMER_H */ diff --git a/arch/sw_64/include/asm/linkage.h b/arch/sw_64/include/asm/linkage.h index 96c83663d9e8..6576fb46a000 100644 --- a/arch/sw_64/include/asm/linkage.h +++ b/arch/sw_64/include/asm/linkage.h @@ -6,4 +6,4 @@ #define SYSCALL_ALIAS(alias, name) \ asm(#alias " = " #name "\n\t.globl " #alias) -#endif +#endif /* _ASM_SW64_LINKAGE_H */ diff --git a/arch/sw_64/include/asm/mmu.h b/arch/sw_64/include/asm/mmu.h index 548c73b318cb..f24219fac654 100644 --- a/arch/sw_64/include/asm/mmu.h +++ b/arch/sw_64/include/asm/mmu.h @@ -7,4 +7,4 @@ typedef struct { unsigned long asid[NR_CPUS]; void *vdso; } mm_context_t; -#endif +#endif /* _ASM_SW64_MMU_H */ diff --git a/arch/sw_64/include/asm/ptrace.h b/arch/sw_64/include/asm/ptrace.h index b5afebf82939..5f6cd305f95e 100644 --- a/arch/sw_64/include/asm/ptrace.h +++ b/arch/sw_64/include/asm/ptrace.h @@ -92,4 +92,4 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) { return regs->r0; } -#endif +#endif /* _ASM_SW64_PTRACE_H */ diff --git a/arch/sw_64/include/asm/setup.h b/arch/sw_64/include/asm/setup.h index c0fb4e8bd80c..384eeba02144 100644 --- a/arch/sw_64/include/asm/setup.h +++ b/arch/sw_64/include/asm/setup.h @@ -43,4 +43,4 @@ extern struct boot_params *sunway_boot_params; #endif -#endif +#endif /* _ASM_SW64_SETUP_H */ diff --git a/arch/sw_64/include/asm/sfp-machine.h b/arch/sw_64/include/asm/sfp-machine.h index 9b3e8688feee..c1b914898543 100644 --- a/arch/sw_64/include/asm/sfp-machine.h +++ b/arch/sw_64/include/asm/sfp-machine.h @@ -66,4 +66,4 @@ do { \ /* We write the results always */ #define FP_INHIBIT_RESULTS 0 -#endif +#endif /* _ASM_SW64_SFP_MACHINE_H */ diff --git a/arch/sw_64/include/asm/signal.h b/arch/sw_64/include/asm/signal.h index 0d846c1aa571..9e0936e6db2b 100644 --- a/arch/sw_64/include/asm/signal.h +++ b/arch/sw_64/include/asm/signal.h @@ -21,4 +21,4 @@ struct odd_sigaction { }; #include -#endif +#endif /* _ASM_SW64_SIGNAL_H */ diff --git a/arch/sw_64/include/asm/smp.h b/arch/sw_64/include/asm/smp.h index e7aa742f73f0..0573361dc840 100644 --- a/arch/sw_64/include/asm/smp.h +++ b/arch/sw_64/include/asm/smp.h @@ -178,4 +178,4 @@ static inline void send_ipi(int cpu, unsigned long type) #define reset_cpu(cpu) send_ipi((cpu), II_RESET) -#endif +#endif /* _ASM_SW64_SMP_H */ diff --git a/arch/sw_64/include/asm/spinlock_types.h b/arch/sw_64/include/asm/spinlock_types.h index 28f2183ced74..62e554e4f48c 100644 --- a/arch/sw_64/include/asm/spinlock_types.h +++ b/arch/sw_64/include/asm/spinlock_types.h @@ -5,4 +5,4 @@ #include #include -#endif +#endif /* _ASM_SW64_SPINLOCK_TYPES_H */ diff --git a/arch/sw_64/include/asm/suspend.h b/arch/sw_64/include/asm/suspend.h index de6d97a0aff6..521ab099f94b 100644 --- a/arch/sw_64/include/asm/suspend.h +++ b/arch/sw_64/include/asm/suspend.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_SLEEP_H -#define _ASM_SW64_SLEEP_H +#ifndef _ASM_SW64_SUSPEND_H +#define _ASM_SW64_SUSPEND_H #include #include @@ -45,4 +45,4 @@ struct processor_state { }; extern void sw64_suspend_deep_sleep(struct processor_state *state); -#endif /* _ASM_SW64_SLEEP_H */ +#endif /* _ASM_SW64_SUSPEND_H */ diff --git a/arch/sw_64/include/asm/sw64_init.h b/arch/sw_64/include/asm/sw64_init.h index 9f16bdf2a61a..aae82f4163e0 100644 --- a/arch/sw_64/include/asm/sw64_init.h +++ b/arch/sw_64/include/asm/sw64_init.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_INIT_H -#define _ASM_SW64_INIT_H +#ifndef _ASM_SW64_SW64_INIT_H +#define _ASM_SW64_SW64_INIT_H #include #include @@ -43,4 +43,4 @@ extern struct sw64_chip_init_ops *sw64_chip_init; DECLARE_PER_CPU(unsigned long, hard_node_id); -#endif /* _ASM_SW64_INIT_H */ +#endif /* _ASM_SW64_SW64_INIT_H */ diff --git a/arch/sw_64/include/asm/sw64io.h b/arch/sw_64/include/asm/sw64io.h index 7d79a5b75090..0892356b8e6b 100644 --- a/arch/sw_64/include/asm/sw64io.h +++ b/arch/sw_64/include/asm/sw64io.h @@ -95,4 +95,4 @@ sw64_io_write(unsigned long node, unsigned long reg, unsigned long data) addr = __va(SW64_IO_BASE(node) | reg); writeq(data, addr); } -#endif +#endif /* _ASM_SW64_SW64IO_H */ diff --git a/arch/sw_64/include/asm/tc.h b/arch/sw_64/include/asm/tc.h index f995a2a75f85..aa39c3528e3f 100644 --- a/arch/sw_64/include/asm/tc.h +++ b/arch/sw_64/include/asm/tc.h @@ -13,4 +13,4 @@ static inline unsigned long rdtc(void) extern void tc_sync_clear(void); extern void tc_sync_ready(void *ignored); extern void tc_sync_set(void); -#endif +#endif /* _ASM_SW64_TC_H */ diff --git a/arch/sw_64/include/asm/timex.h b/arch/sw_64/include/asm/timex.h index 9065e39a0466..235197b0d1fd 100644 --- a/arch/sw_64/include/asm/timex.h +++ b/arch/sw_64/include/asm/timex.h @@ -21,4 +21,4 @@ static inline cycles_t get_cycles(void) return rdtc(); } -#endif +#endif /* _ASM_SW64_TIMEX_H */ diff --git a/arch/sw_64/include/asm/tlb.h b/arch/sw_64/include/asm/tlb.h index 4902624dba88..67ce55fc4c43 100644 --- a/arch/sw_64/include/asm/tlb.h +++ b/arch/sw_64/include/asm/tlb.h @@ -15,4 +15,4 @@ #define __pud_free_tlb(tlb, pud, address) pud_free((tlb)->mm, pud) -#endif +#endif /* _ASM_SW64_TLB_H */ diff --git a/arch/sw_64/include/asm/wrperfmon.h b/arch/sw_64/include/asm/wrperfmon.h index 15f7f6beb07c..c06a05121a68 100644 --- a/arch/sw_64/include/asm/wrperfmon.h +++ b/arch/sw_64/include/asm/wrperfmon.h @@ -61,4 +61,4 @@ #define MAX_HWEVENTS 2 #define PMC_COUNT_MASK ((1UL << 58) - 1) -#endif +#endif /* _ASM_SW64_WRPERFMON_H */ diff --git a/arch/sw_64/include/asm/xchg.h b/arch/sw_64/include/asm/xchg.h index bac67623da91..ba4e6d1a27ad 100644 --- a/arch/sw_64/include/asm/xchg.h +++ b/arch/sw_64/include/asm/xchg.h @@ -1,7 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SW64_XCHG_H +#define _ASM_SW64_XCHG_H + #ifndef _ASM_SW64_CMPXCHG_H -#error Do not include xchg.h directly! -#else +#error Do not include xchg.h directly. Use cmpxchg.h +#endif /* * xchg/xchg_local and cmpxchg/cmpxchg_local share the same code * except that local version do not have the expensive memory barrier. @@ -325,4 +328,4 @@ static __always_inline unsigned long ____cmpxchg(, volatile void *ptr, return old; } -#endif +#endif /* _ASM_SW64_XCHG_H */ diff --git a/arch/sw_64/include/asm/xor.h b/arch/sw_64/include/asm/xor.h index af95259ed8ef..e9731f2a8f12 100644 --- a/arch/sw_64/include/asm/xor.h +++ b/arch/sw_64/include/asm/xor.h @@ -844,4 +844,4 @@ static struct xor_block_template xor_block_sw64_prefetch = { */ #define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sw64_prefetch) -#endif +#endif /* _ASM_SW64_XOR_H */ diff --git a/arch/sw_64/include/uapi/asm/errno.h b/arch/sw_64/include/uapi/asm/errno.h index 94e1eab5d003..969ee99ee86c 100644 --- a/arch/sw_64/include/uapi/asm/errno.h +++ b/arch/sw_64/include/uapi/asm/errno.h @@ -125,4 +125,4 @@ #define EHWPOISON 139 /* Memory page has hardware error */ -#endif +#endif /* _UAPI_ASM_SW64_ERRNO_H */ diff --git a/arch/sw_64/include/uapi/asm/fcntl.h b/arch/sw_64/include/uapi/asm/fcntl.h index 99e1a31c5e86..be2daae2cc4d 100644 --- a/arch/sw_64/include/uapi/asm/fcntl.h +++ b/arch/sw_64/include/uapi/asm/fcntl.h @@ -55,4 +55,4 @@ #include -#endif +#endif /* _UAPI_ASM_SW64_FCNTL_H */ diff --git a/arch/sw_64/include/uapi/asm/hmcall.h b/arch/sw_64/include/uapi/asm/hmcall.h index dcff778e1616..6867fb7b4d24 100644 --- a/arch/sw_64/include/uapi/asm/hmcall.h +++ b/arch/sw_64/include/uapi/asm/hmcall.h @@ -14,4 +14,4 @@ #define HMC_gentrap 0xAA #define HMC_wrperfmon 0xB0 -#endif +#endif /* _UAPI_ASM_SW64_HMCALL_H */ diff --git a/arch/sw_64/include/uapi/asm/perf_regs.h b/arch/sw_64/include/uapi/asm/perf_regs.h index 1378a7397951..febde5fd72fb 100644 --- a/arch/sw_64/include/uapi/asm/perf_regs.h +++ b/arch/sw_64/include/uapi/asm/perf_regs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_SW64_PERF_REGS_H -#define _ASM_SW64_PERF_REGS_H +#ifndef _UAPI_ASM_SW64_PERF_REGS_H +#define _UAPI_ASM_SW64_PERF_REGS_H enum perf_event_sw64_regs { PERF_REG_SW64_R0, @@ -42,4 +42,4 @@ enum perf_event_sw64_regs { PERF_REG_SW64_R18, PERF_REG_SW64_MAX, }; -#endif /* _ASM_SW64_PERF_REGS_H */ +#endif /* _UAPI_ASM_SW64_PERF_REGS_H */ diff --git a/arch/sw_64/include/uapi/asm/setup.h b/arch/sw_64/include/uapi/asm/setup.h index 10ce5dba9c30..e6cca4525049 100644 --- a/arch/sw_64/include/uapi/asm/setup.h +++ b/arch/sw_64/include/uapi/asm/setup.h @@ -4,4 +4,4 @@ #define COMMAND_LINE_SIZE 2048 -#endif +#endif /* _UAPI_ASM_SW64_SETUP_H */ diff --git a/arch/sw_64/include/uapi/asm/sigcontext.h b/arch/sw_64/include/uapi/asm/sigcontext.h index 11d7eece86ef..08a081470383 100644 --- a/arch/sw_64/include/uapi/asm/sigcontext.h +++ b/arch/sw_64/include/uapi/asm/sigcontext.h @@ -31,4 +31,4 @@ struct sigcontext { }; -#endif +#endif /* _UAPI_ASM_SW64_SIGCONTEXT_H */ diff --git a/arch/sw_64/include/uapi/asm/siginfo.h b/arch/sw_64/include/uapi/asm/siginfo.h index 4a58eea9b67c..28c656c28313 100644 --- a/arch/sw_64/include/uapi/asm/siginfo.h +++ b/arch/sw_64/include/uapi/asm/siginfo.h @@ -8,4 +8,4 @@ #include -#endif +#endif /* _UAPI_ASM_SW64_SIGINFO_H */ diff --git a/arch/sw_64/include/uapi/asm/stat.h b/arch/sw_64/include/uapi/asm/stat.h index d2b21128c569..25aad21f4d31 100644 --- a/arch/sw_64/include/uapi/asm/stat.h +++ b/arch/sw_64/include/uapi/asm/stat.h @@ -48,4 +48,4 @@ struct stat64 { long __unused[3]; }; -#endif +#endif /* _UAPI_ASM_SW64_STAT_H */ diff --git a/arch/sw_64/kernel/pci_impl.h b/arch/sw_64/kernel/pci_impl.h index 41d83a41db56..4262ba94f44b 100644 --- a/arch/sw_64/kernel/pci_impl.h +++ b/arch/sw_64/kernel/pci_impl.h @@ -25,4 +25,4 @@ extern const struct dma_map_ops sw64_dma_direct_ops; extern struct cma *sw64_kvm_cma; extern struct gen_pool *sw64_kvm_pool; -#endif +#endif /* _SW64_KERNEL_PCI_IMPL_H */ diff --git a/arch/sw_64/kernel/proto.h b/arch/sw_64/kernel/proto.h index 8c31eca3cc32..f908263f925a 100644 --- a/arch/sw_64/kernel/proto.h +++ b/arch/sw_64/kernel/proto.h @@ -19,4 +19,4 @@ extern void __init setup_sched_clock(void); extern void __init sw64_sched_clock_init(void); #endif -#endif /* _SW64_PROTO_H */ +#endif /* _SW64_KERNEL_PROTO_H */ diff --git a/arch/sw_64/kvm/irq.h b/arch/sw_64/kvm/irq.h index ee56d9b97632..9268ab6af492 100644 --- a/arch/sw_64/kvm/irq.h +++ b/arch/sw_64/kvm/irq.h @@ -3,10 +3,10 @@ * irq.h: in kernel interrupt controller related definitions */ -#ifndef __IRQ_H -#define __IRQ_H +#ifndef _SW64_KVM_IRQ_H +#define _SW64_KVM_IRQ_H static inline int irqchip_in_kernel(struct kvm *kvm) { return 1; } -#endif +#endif /* _SW64_KVM_IRQ_H */ diff --git a/arch/sw_64/math-emu/sfp-util.h b/arch/sw_64/math-emu/sfp-util.h index 63f9685999f3..0769c0223e0d 100644 --- a/arch/sw_64/math-emu/sfp-util.h +++ b/arch/sw_64/math-emu/sfp-util.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SW64_MATH_EMU_SFP_UTIL_H +#define _SW64_MATH_EMU_SFP_UTIL_H + #include #include #include @@ -34,3 +37,5 @@ extern unsigned long __udiv_qrnnd(unsigned long *, unsigned long, #define __LITTLE_ENDIAN -1 #endif #define __BYTE_ORDER __LITTLE_ENDIAN + +#endif /* _SW64_MATH_EMU_SFP_UTIL_H */ diff --git a/arch/sw_64/net/bpf_jit.h b/arch/sw_64/net/bpf_jit.h index 2cf5ba5253a8..929036d8ea6b 100644 --- a/arch/sw_64/net/bpf_jit.h +++ b/arch/sw_64/net/bpf_jit.h @@ -18,8 +18,8 @@ * along with this program. If not, see . */ -#ifndef _SW64_BPF_JIT_H -#define _SW64_BPF_JIT_H +#ifndef _SW64_NET_BPF_JIT_H +#define _SW64_NET_BPF_JIT_H /* SW64 instruction field shift */ #define SW64_BPF_OPCODE_OFFSET 26 @@ -365,4 +365,4 @@ enum sw64_bpf_registers { sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPULE) -#endif /* _SW64_BPF_JIT_H */ +#endif /* _SW64_NET_BPF_JIT_H */ diff --git a/arch/sw_64/oprofile/op_impl.h b/arch/sw_64/oprofile/op_impl.h index 10bdd455c3dd..e8714cb7c2ea 100644 --- a/arch/sw_64/oprofile/op_impl.h +++ b/arch/sw_64/oprofile/op_impl.h @@ -53,4 +53,4 @@ struct op_axp_model { unsigned char can_set_proc_mode; }; -#endif +#endif /* _SW64_OPROFILE_OP_IMPL_H */ diff --git a/arch/sw_64/tools/relocs.h b/arch/sw_64/tools/relocs.h index 7273ccaed11f..37ac09ec2a77 100644 --- a/arch/sw_64/tools/relocs.h +++ b/arch/sw_64/tools/relocs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef RELOCS_H -#define RELOCS_H +#ifndef _SW64_TOOLS_RELOCS_H +#define _SW64_TOOLS_RELOCS_H #include #include @@ -68,4 +68,4 @@ enum symtype { void process(FILE *fp, int as_text, int as_bin, int show_reloc_info, int keep_relocs); -#endif /* RELOCS_H */ +#endif /* _SW64_TOOLS_RELOCS_H */ -- Gitee From dcde4fb60cb54debcc32cba1513e5d7f750e152e Mon Sep 17 00:00:00 2001 From: He Chuyue Date: Thu, 4 Aug 2022 16:35:30 +0800 Subject: [PATCH 58/77] sw64: perf: fix PMI with no event Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XTIW -------------------------------- For perf disable PMU, this then presents the following error condition in processes schedule: Process A Process B Disable irq Disable PMU Enable irq ->sw64_perf_event_irq_handler() When irq is disabled, PMC may still overflow then a PMI triggers. After another process is scheduled and irq is enabled, this PMI will raise immediately. To avoid this, clear interrupt flag in hmcode when it disable PMU. However, in kernel, events that do not exist will return directly. Signed-off-by: He Chuyue Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/perf_event.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/arch/sw_64/kernel/perf_event.c b/arch/sw_64/kernel/perf_event.c index 6e344239917b..4122502da895 100644 --- a/arch/sw_64/kernel/perf_event.c +++ b/arch/sw_64/kernel/perf_event.c @@ -385,7 +385,6 @@ static int sw64_pmu_add(struct perf_event *event, int flags) int err = 0; unsigned long irq_flags; - perf_pmu_disable(event->pmu); local_irq_save(irq_flags); if (cpuc->pmcs[hwc->idx] == PMC_IN_USE) { @@ -408,7 +407,6 @@ static int sw64_pmu_add(struct perf_event *event, int flags) out: local_irq_restore(irq_flags); - perf_pmu_enable(event->pmu); return err; } @@ -422,24 +420,17 @@ static void sw64_pmu_del(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; unsigned long irq_flags; - perf_pmu_disable(event->pmu); local_irq_save(irq_flags); - if (cpuc->event[hwc->idx] != event) - goto out; - + sw64_pmu_stop(event, PERF_EF_UPDATE); cpuc->event[hwc->idx] = NULL; cpuc->pmcs[hwc->idx] = PMC_NOT_USE; cpuc->n_events--; - sw64_pmu_stop(event, PERF_EF_UPDATE); - /* Absorb the final count and turn off the event. */ perf_event_update_userpage(event); -out: local_irq_restore(irq_flags); - perf_pmu_enable(event->pmu); } /* @@ -478,6 +469,9 @@ static void sw64_pmu_stop(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; if (!(hwc->state & PERF_HES_STOPPED)) { + wrperfmon(PERFMON_CMD_DISABLE, hwc->idx == 0 ? + PERFMON_DISABLE_ARGS_PC0 : + PERFMON_DISABLE_ARGS_PC1); hwc->state |= PERF_HES_STOPPED; barrier(); } @@ -486,12 +480,6 @@ static void sw64_pmu_stop(struct perf_event *event, int flags) sw64_perf_event_update(event, hwc, hwc->idx, 0); hwc->state |= PERF_HES_UPTODATE; } - - if (hwc->idx == 0) - wrperfmon(PERFMON_CMD_DISABLE, PERFMON_DISABLE_ARGS_PC0); - else - wrperfmon(PERFMON_CMD_DISABLE, PERFMON_DISABLE_ARGS_PC1); - } /* @@ -659,10 +647,7 @@ static void sw64_perf_event_irq_handler(unsigned long perfmon_num, event = cpuc->event[idx]; if (unlikely(!event)) { - /* This should never occur! */ irq_err_count++; - pr_warn("PMI: No event at index %d!\n", idx); - wrperfmon(PERFMON_CMD_ENABLE, idx == 0 ? PERFMON_DISABLE_ARGS_PC0 : PERFMON_DISABLE_ARGS_PC1); return; } -- Gitee From 267e1de395ff6bf7275ba0e795d6a74e50989a1a Mon Sep 17 00:00:00 2001 From: He Chuyue Date: Fri, 9 Sep 2022 14:23:37 +0800 Subject: [PATCH 59/77] sw64: remove useless enum Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Signed-off-by: He Chuyue Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/ptrace.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/sw_64/kernel/ptrace.c b/arch/sw_64/kernel/ptrace.c index f3bc1020eaff..e98679d10fae 100644 --- a/arch/sw_64/kernel/ptrace.c +++ b/arch/sw_64/kernel/ptrace.c @@ -46,16 +46,6 @@ * zero have no stack-slot and need to be treated specially (see * get_reg/put_reg below). */ -enum { - REG_R0 = 0, - REG_F0 = 32, - REG_FPCR = 63, - REG_PC = 64, - REG_SP = 30, - REG_PS = 31, - REG_GP = 29 -}; - #define R(x) ((size_t) &((struct pt_regs *)0)->x) short regoffsets[32] = { -- Gitee From 6dd8947dffd3097881e9aacf1d0e0f7881de03a4 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Fri, 16 Sep 2022 09:30:41 +0800 Subject: [PATCH 60/77] sw64: perf: add perf kvm support for guest os Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XTJN -------------------------------- Signed-off-by: Chen Wang Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/kvm_asm.h | 12 ++++++ arch/sw_64/include/asm/kvm_host.h | 3 ++ arch/sw_64/include/asm/perf_event.h | 8 ++++ arch/sw_64/kernel/perf_event.c | 32 +++++++++++++++ arch/sw_64/kvm/Makefile | 2 +- arch/sw_64/kvm/kvm-sw64.c | 24 +++++++++++ arch/sw_64/kvm/perf.c | 53 ++++++++++++++++++++++++ arch/sw_64/kvm/trace.h | 62 +++++++++++++++++++++++++++++ 8 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 arch/sw_64/kvm/perf.c create mode 100644 arch/sw_64/kvm/trace.h diff --git a/arch/sw_64/include/asm/kvm_asm.h b/arch/sw_64/include/asm/kvm_asm.h index 7e2c92ed4574..d408e90cee62 100644 --- a/arch/sw_64/include/asm/kvm_asm.h +++ b/arch/sw_64/include/asm/kvm_asm.h @@ -14,4 +14,16 @@ #ifdef CONFIG_KVM_MEMHOTPLUG #define SW64_KVM_EXIT_MEMHOTPLUG 23 #endif + +#define kvm_sw64_exception_type \ + {0, "HOST_INTR" }, \ + {1, "IO" }, \ + {10, "HALT" }, \ + {12, "SHUTDOWN" }, \ + {13, "TIMER" }, \ + {14, "IPI" }, \ + {17, "RESTART" }, \ + {22, "FATAL_ERROR" }, \ + {23, "MEMHOTPLUG" } + #endif /* _ASM_SW64_KVM_ASM_H */ diff --git a/arch/sw_64/include/asm/kvm_host.h b/arch/sw_64/include/asm/kvm_host.h index 835ccef7490f..02d7131f0286 100644 --- a/arch/sw_64/include/asm/kvm_host.h +++ b/arch/sw_64/include/asm/kvm_host.h @@ -124,4 +124,7 @@ static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} +int kvm_sw64_perf_init(void); +int kvm_sw64_perf_teardown(void); + #endif /* _ASM_SW64_KVM_HOST_H */ diff --git a/arch/sw_64/include/asm/perf_event.h b/arch/sw_64/include/asm/perf_event.h index 5f5a45217544..4212342334d5 100644 --- a/arch/sw_64/include/asm/perf_event.h +++ b/arch/sw_64/include/asm/perf_event.h @@ -3,5 +3,13 @@ #define _ASM_SW64_PERF_EVENT_H #include +#include + +#ifdef CONFIG_PERF_EVENTS +struct pt_regs; +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) +#endif #endif /* _ASM_SW64_PERF_EVENT_H */ diff --git a/arch/sw_64/kernel/perf_event.c b/arch/sw_64/kernel/perf_event.c index 4122502da895..f1f74a968cbc 100644 --- a/arch/sw_64/kernel/perf_event.c +++ b/arch/sw_64/kernel/perf_event.c @@ -760,6 +760,38 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, walk_stackframe(NULL, regs, callchain_trace, entry); } +/* + * Gets the perf_instruction_pointer and perf_misc_flags for guest os. + */ +#undef is_in_guest + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + return perf_guest_cbs->get_guest_ip(); + + return instruction_pointer(regs); +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + int misc = 0; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_cbs->is_user_mode()) + misc |= PERF_RECORD_MISC_GUEST_USER; + else + misc |= PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; + } + + return misc; +} + /* * Init call to initialise performance events at kernel startup. */ diff --git a/arch/sw_64/kvm/Makefile b/arch/sw_64/kvm/Makefile index 48ae938faab7..43cea19215ff 100644 --- a/arch/sw_64/kvm/Makefile +++ b/arch/sw_64/kvm/Makefile @@ -8,6 +8,6 @@ KVM := ../../../virt/kvm ccflags-y += -Ivirt/kvm -Iarch/sw_64/kvm kvm-$(CONFIG_KVM_SW64_HOST) += $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o -kvm-$(CONFIG_KVM_SW64_HOST) += kvm-sw64.o entry.o emulate.o mmio.o kvm_timer.o handle_exit.o +kvm-$(CONFIG_KVM_SW64_HOST) += kvm-sw64.o entry.o emulate.o mmio.o kvm_timer.o handle_exit.o perf.o obj-$(CONFIG_KVM_SW64_HOST) += kvm.o diff --git a/arch/sw_64/kvm/kvm-sw64.c b/arch/sw_64/kvm/kvm-sw64.c index 825fe39f0494..6afff2257947 100644 --- a/arch/sw_64/kvm/kvm-sw64.c +++ b/arch/sw_64/kvm/kvm-sw64.c @@ -16,6 +16,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include "trace.h" + #include "../kernel/pci_impl.h" #include "vmem.c" @@ -34,6 +37,13 @@ extern bool bind_vcpu_enabled; #define HARDWARE_VPN_MASK ((1UL << WIDTH_HARDWARE_VPN) - 1) #define VPN_SHIFT (64 - WIDTH_HARDWARE_VPN) +static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_running_vcpu); + +static void kvm_set_running_vcpu(struct kvm_vcpu *vcpu) +{ + __this_cpu_write(kvm_running_vcpu, vcpu); +} + int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) { set_bit(number, (vcpu->arch.irqs_pending)); @@ -462,6 +472,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; + kvm_set_running_vcpu(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -472,6 +483,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) * optimized make_all_cpus_request path. */ vcpu->cpu = -1; + kvm_set_running_vcpu(NULL); } int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, @@ -561,6 +573,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu->arch.vcb.upcr = 0x7; } +#ifdef CONFIG_PERF_EVENTS + vcpu_load(vcpu); +#endif if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); @@ -601,6 +616,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) guest_enter_irqoff(); /* Enter the guest */ + trace_kvm_sw64_entry(vcpu->vcpu_id, vcpu->arch.regs.pc); vcpu->mode = IN_GUEST_MODE; ret = __sw64_vcpu_run((struct vcpucb *)__phys_addr((unsigned long)vcb), &(vcpu->arch.regs), &hargs); @@ -610,6 +626,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) local_irq_enable(); guest_exit_irqoff(); + + trace_kvm_sw64_exit(ret, vcpu->arch.regs.pc); + preempt_enable(); /* ret = 0 indicate interrupt in guest mode, ret > 0 indicate hcall */ @@ -619,6 +638,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); +#ifdef CONFIG_PERF_EVENTS + vcpu_put(vcpu); +#endif return ret; } @@ -667,11 +689,13 @@ long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) int kvm_arch_init(void *opaque) { + kvm_sw64_perf_init(); return 0; } void kvm_arch_exit(void) { + kvm_sw64_perf_teardown(); } void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) diff --git a/arch/sw_64/kvm/perf.c b/arch/sw_64/kvm/perf.c new file mode 100644 index 000000000000..8d90d79643de --- /dev/null +++ b/arch/sw_64/kvm/perf.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Performance events support for KVM. + */ + +#include +#include + +#include + +static int kvm_is_in_guest(void) +{ + return kvm_get_running_vcpu() != NULL; +} + +static int kvm_is_user_mode(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_running_vcpu(); + + if (vcpu) + return (vcpu->arch.regs.ps & 8) != 0; + + return 0; +} + +static unsigned long kvm_get_guest_ip(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_running_vcpu(); + + if (vcpu) + return vcpu->arch.regs.pc; + return 0; +} + +static struct perf_guest_info_callbacks kvm_guest_cbs = { + .is_in_guest = kvm_is_in_guest, + .is_user_mode = kvm_is_user_mode, + .get_guest_ip = kvm_get_guest_ip, +}; + +int kvm_sw64_perf_init(void) +{ + return perf_register_guest_info_callbacks(&kvm_guest_cbs); +} + +int kvm_sw64_perf_teardown(void) +{ + return perf_unregister_guest_info_callbacks(&kvm_guest_cbs); +} diff --git a/arch/sw_64/kvm/trace.h b/arch/sw_64/kvm/trace.h new file mode 100644 index 000000000000..2611df3d3fa5 --- /dev/null +++ b/arch/sw_64/kvm/trace.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(_SW64_KVM_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _SW64_KVM_TRACE_H + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +/* + * Tracepoint for guest mode entry. + */ +TRACE_EVENT(kvm_sw64_entry, + TP_PROTO(unsigned int vcpu_id, unsigned int vcpu_pc), + TP_ARGS(vcpu_id, vcpu_pc), + + TP_STRUCT__entry( + __field(unsigned int, vcpu_id) + __field(unsigned int, vcpu_pc) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->vcpu_pc = vcpu_pc; + ), + + TP_printk("VCPU %u: PC: 0x%08x", __entry->vcpu_id, __entry->vcpu_pc) +); + +/* + * Tracepoint for guest mode exit. + */ + +TRACE_EVENT(kvm_sw64_exit, + TP_PROTO(unsigned int exit_reason, unsigned long vcpu_pc), + TP_ARGS(exit_reason, vcpu_pc), + + TP_STRUCT__entry( + __field(unsigned int, exit_reason) + __field(unsigned long, vcpu_pc) + ), + + TP_fast_assign( + __entry->exit_reason = exit_reason; + __entry->vcpu_pc = vcpu_pc; + ), + + TP_printk("exit_reason: 0x%04x (%11s), PC: 0x%08lx", + __entry->exit_reason, + __print_symbolic(__entry->exit_reason, kvm_sw64_exception_type), + __entry->vcpu_pc) +); + +#endif /* _SW64_KVM_TRACE_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include -- Gitee From 09a0f6e3b7f813bc96156dc4d0bcf9f4b17b7bad Mon Sep 17 00:00:00 2001 From: He Sheng Date: Fri, 16 Sep 2022 15:17:44 +0800 Subject: [PATCH 61/77] sw64: tools: add R_SW64_LITERAL_GOT support for relocs Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Signed-off-by: He Sheng Reviewed-by: Cui Wei Signed-off-by: Gu Zitao --- arch/sw_64/tools/relocs.c | 1 + arch/sw_64/tools/relocs.h | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/sw_64/tools/relocs.c b/arch/sw_64/tools/relocs.c index a8a9e08a0a65..06bd4625bc6e 100644 --- a/arch/sw_64/tools/relocs.c +++ b/arch/sw_64/tools/relocs.c @@ -487,6 +487,7 @@ static int do_reloc(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, case R_SW64_SREL32: case R_SW64_GPRELHIGH: case R_SW64_GPRELLOW: + case R_SW64_LITERAL_GOT: /* * NONE can be ignored and PC relative relocations don't * need to be adjusted. diff --git a/arch/sw_64/tools/relocs.h b/arch/sw_64/tools/relocs.h index 37ac09ec2a77..17c7e31113a0 100644 --- a/arch/sw_64/tools/relocs.h +++ b/arch/sw_64/tools/relocs.h @@ -53,6 +53,7 @@ #define R_SW64_TPRELHI 39 #define R_SW64_TPRELLO 40 #define R_SW64_TPREL16 41 +#define R_SW64_LITERAL_GOT 43 /* GP relative */ void die(char *fmt, ...); -- Gitee From f51c48d834ec5115a47b424d562cedafe3b6100f Mon Sep 17 00:00:00 2001 From: He Sheng Date: Fri, 16 Sep 2022 15:19:21 +0800 Subject: [PATCH 62/77] sw64: remove unused sync_icache() and some debug codes Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Signed-off-by: He Sheng Reviewed-by: Cui Wei Signed-off-by: Gu Zitao --- arch/sw_64/kernel/relocate.c | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/arch/sw_64/kernel/relocate.c b/arch/sw_64/kernel/relocate.c index fe403f9c70c7..792ee1a9c2b1 100644 --- a/arch/sw_64/kernel/relocate.c +++ b/arch/sw_64/kernel/relocate.c @@ -1,10 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. + * Support for kernel relocation at boot time. * - * Support for Kernel relocation at boot time + * Based on arch/mips/kernel/relocate.c * * Copyright (C) 2019 He Sheng * Authors: He Sheng (hesheng05@gmail.com) @@ -15,7 +13,6 @@ #include -#define INITRD_ADDR 0x3000000UL #define KTEXT_MAX 0xffffffffa0000000UL #define RELOCATED(x) ((void *)((unsigned long)x + offset)) @@ -30,8 +27,6 @@ extern unsigned long __start___ex_table; /* Start exception table */ extern unsigned long __stop___ex_table; /* End exception table */ extern union thread_union init_thread_union; -extern void __weak plat_fdt_relocated(void *new_location); - /* * This function may be defined for a platform to perform any post-relocation * fixup necessary. @@ -42,13 +37,6 @@ int __weak plat_post_relocation(long offset) return 0; } - -static void __init sync_icache(void) -{ - // IC_FLUSH - imb(); -} - static int __init apply_r_sw64_refquad(unsigned long *loc_orig, unsigned long *loc_new, unsigned int offset) { *(unsigned long *)loc_new += offset; @@ -166,13 +154,14 @@ static unsigned long __init determine_relocation_offset(void) if (offset < kernel_length) offset += ALIGN(kernel_length, 0x10000); - /* TODO: 119MB is for test */ - offset = (119 << 20); + /* + * TODO:new location should not overlaps initrd, dtb, acpi + * tables, etc. + */ + if ((KTEXT_MAX - (unsigned long)_end) < offset) offset = 0; - // TODO:new location should not overlaps initrd - return offset; } @@ -216,9 +205,7 @@ unsigned int __init relocate_kernel(void) bss_length = (unsigned long)&__bss_stop - (long)&__bss_start; offset = determine_relocation_offset(); - /* Reset the command line now so we don't end up with a duplicate */ - //arcs_cmdline[0] = '\0'; /* Sanity check relocation address */ if (offset && relocation_offset_valid(offset)) { @@ -232,9 +219,6 @@ unsigned int __init relocate_kernel(void) if (res < 0) goto out; - /* Sync the caches ready for execution of new kernel */ - sync_icache(); - res = relocate_got(offset); if (res < 0) goto out; @@ -259,7 +243,6 @@ unsigned int __init relocate_kernel(void) __current_thread_info = RELOCATED(&init_thread_union); /* Return the new kernel's offset */ - //printk("loc_new:%p, start_kernel: %p, gp:%p\n", loc_new, kernel_entry, kgp); return offset; } out: -- Gitee From 36b92f77301be29ab3ff2419ff489fdf95bc6081 Mon Sep 17 00:00:00 2001 From: Xu Chenjiao Date: Fri, 16 Sep 2022 09:34:33 +0800 Subject: [PATCH 63/77] sw64: add support for S3 sleep option Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XTK7 -------------------------------- The S3 sleeping state is a low wake latency sleeping state where all system context is lost except system memory. This state will put memory device controller into self-refresh mode in which the memory device maintains its stored data without any active command from the memory controller. At present, only SW831 supports S3 sleep option and has been tested successfully on SW831 CRB. BTW, one should upgrade SROM, HMCode and BIOS firmwares to enable this function. Signed-off-by: Xu Chenjiao Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 6 ++ arch/sw_64/chip/chip3/chip.c | 167 +++++++++++++++++++++++++++++ arch/sw_64/include/asm/chip3_io.h | 3 + arch/sw_64/include/asm/pci.h | 7 ++ arch/sw_64/include/asm/sw64_init.h | 2 +- arch/sw_64/kernel/smp.c | 1 + arch/sw_64/kernel/suspend.c | 13 +++ 7 files changed, 198 insertions(+), 1 deletion(-) diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index 17b48c7e5fc1..b37a4f4e093e 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -876,6 +876,12 @@ config SW64_SUSPEND_DEEPSLEEP_BOOTCORE bool "SW64 bootcore suspend into deep sleep mode" default n +config SW64_SUPPORT_S3_SLEEPING_STATE + depends on SUSPEND + bool "SW64 support S3 sleeping state" + default n + help + Only SW831 support S3 sleep option and needs SROM, HMCode and BIOS support. source "drivers/cpuidle/Kconfig" diff --git a/arch/sw_64/chip/chip3/chip.c b/arch/sw_64/chip/chip3/chip.c index 105389d5989f..d0b1c1c1c6df 100644 --- a/arch/sw_64/chip/chip3/chip.c +++ b/arch/sw_64/chip/chip3/chip.c @@ -493,6 +493,172 @@ static void chip3_device_interrupt(unsigned long irq_info) } } +static void chip3_i2c_srst(void) +{ + sw64_io_write(0, I2C0_SRST_L, 0x0); + sw64_io_write(0, I2C0_SRST_L, 0x1); + + sw64_io_write(0, I2C1_SRST_L, 0x0); + sw64_io_write(0, I2C1_SRST_L, 0x1); + + sw64_io_write(0, I2C2_SRST_L, 0x0); + sw64_io_write(0, I2C2_SRST_L, 0x1); +} + +static void chip3_pcie_save(void) +{ + struct pci_controller *hose; + struct piu_saved *piu_save; + unsigned long node, index; + unsigned long i; + + for (hose = hose_head; hose; hose = hose->next) { + piu_save = kzalloc(sizeof(*piu_save), GFP_KERNEL); + + node = hose->node; + index = hose->index; + hose->sysdata = piu_save; + + piu_save->piuconfig0 = read_piu_ior0(node, index, PIUCONFIG0); + piu_save->piuconfig1 = read_piu_ior1(node, index, PIUCONFIG1); + piu_save->epdmabar = read_piu_ior0(node, index, EPDMABAR); + piu_save->msiaddr = read_piu_ior0(node, index, MSIADDR); + + for (i = 0; i < 256; i++) { + piu_save->msiconfig[i] = read_piu_ior0(node, index, + MSICONFIG0 + (i << 7)); + } + } +} + +static void chip3_pcie_restore(void) +{ + struct pci_controller *hose; + struct piu_saved *piu_save; + unsigned long node, index; + u32 rc_misc_ctrl; + unsigned int value; + unsigned long i; + + for (hose = hose_head; hose; hose = hose->next) { + node = hose->node; + index = hose->index; + piu_save = hose->sysdata; + + write_piu_ior0(node, index, PIUCONFIG0, piu_save->piuconfig0); + write_piu_ior1(node, index, PIUCONFIG1, piu_save->piuconfig1); + write_piu_ior0(node, index, EPDMABAR, piu_save->epdmabar); + write_piu_ior0(node, index, MSIADDR, piu_save->msiaddr); + + for (i = 0; i < 256; i++) { + write_piu_ior0(node, index, MSICONFIG0 + (i << 7), + piu_save->msiconfig[i]); + } + + /* Enable DBI_RO_WR_EN */ + rc_misc_ctrl = read_rc_conf(node, index, RC_MISC_CONTROL_1); + write_rc_conf(node, index, RC_MISC_CONTROL_1, rc_misc_ctrl | 0x1); + + /* Fix up DEVICE_ID_VENDOR_ID register */ + value = (PCI_DEVICE_ID_CHIP3 << 16) | PCI_VENDOR_ID_JN; + write_rc_conf(node, index, RC_VENDOR_ID, value); + + /* Set PCI-E root class code */ + value = read_rc_conf(node, index, RC_REVISION_ID); + write_rc_conf(node, index, RC_REVISION_ID, (PCI_CLASS_BRIDGE_HOST << 16) | value); + + /* Disable DBI_RO_WR_EN */ + write_rc_conf(node, index, RC_MISC_CONTROL_1, rc_misc_ctrl); + } + +} + +static unsigned long saved_dvc_int, saved_long_time; + +static inline void chip3_intpu_save(void) +{ + saved_long_time = sw64_io_read(0, LONG_TIME); +} + +static inline void chip3_intpu_restore(void) +{ + switch (cpu_desc.model) { + case CPU_SW831: + sw64_io_write(0, LONG_TIME, saved_long_time); + sw64_io_write(0, LONG_TIME_START_EN, 0x1); + break; + default: + pr_info("long time start is disable!"); + break; + } +} + +static inline void chip3_spbu_save(void) +{ + saved_dvc_int = sw64_io_read(0, MCU_DVC_INT_EN); +} + +static inline void chip3_spbu_restore(void) +{ + chip3_i2c_srst(); + sw64_io_write(0, MCU_DVC_INT_EN, saved_dvc_int); +} + +#define BIOS_SECBIN 0x2F00000UL +#define BIOS_SECSIZE 0x40000UL +#define BOUNCE_BUFFER ((1UL<<32) - BIOS_SECSIZE) +#define BIOS_MEMSAVE ((1UL<<32) - 2 * BIOS_SECSIZE) + +/* + * Due to specific architecture PCI MEM32 addressing, we reserve 512M memory + * size at PCI_32BIT_MEMIO (0xE000_0000) on SW64 platform. + * + * Since this memory region is still usable by OS, we implement a interface + * contract between BIOS and kernel: + * + * Firstly BIOS should back up SEC relative code segment to BIOS_MEMSAVE region + * with the length BIOS_SECSIZE in order to restore BIOS SEC phase binary during + * S3 sleep. + * + * Secondly kernel should use a bounce buffer to save memory region which may be + * overwritten by BIOS on resume from S3 sleep. + */ +static void chip3_mem_restore(void) +{ + void *dst, *src; + unsigned long size = BIOS_SECSIZE; + + /* Firstly kernel back up to a bounce buffer */ + src = __va(BIOS_SECBIN); + dst = __va(BOUNCE_BUFFER); + memcpy(dst, src, size); + + /* Secondly restore BIOS SEC phase binary */ + src = __va(BIOS_MEMSAVE); + dst = __va(BIOS_SECBIN); + memcpy(dst, src, size); +} + +extern void cpld_write(uint8_t slave_addr, uint8_t reg, uint8_t data); + +static void chip3_suspend(bool wakeup) +{ + + if (wakeup) { + chip3_pcie_restore(); + chip3_intpu_restore(); + chip3_spbu_restore(); + } else { + /* Set S3 flag */ + cpld_write(0x64, 0x34, 0x33); + + chip3_spbu_save(); + chip3_intpu_save(); + chip3_pcie_save(); + chip3_mem_restore(); + } +} + static void chip3_hose_init(struct pci_controller *hose) { unsigned long pci_io_base; @@ -574,6 +740,7 @@ static struct sw64_chip_init_ops chip3_chip_init_ops = { static struct sw64_chip_ops chip3_chip_ops = { .get_cpu_num = chip3_get_cpu_nums, + .suspend = chip3_suspend, .fixup = chip3_ops_fixup, }; diff --git a/arch/sw_64/include/asm/chip3_io.h b/arch/sw_64/include/asm/chip3_io.h index 3bfbc2bdafe7..18e79cf2a36b 100644 --- a/arch/sw_64/include/asm/chip3_io.h +++ b/arch/sw_64/include/asm/chip3_io.h @@ -165,6 +165,9 @@ enum { MC_CAP_CFG = MCU_BASE | 0x1180UL, IO_START = MCU_BASE | 0x1300UL, UART_ONLINE = MCU_BASE | 0x1780UL, + I2C0_SRST_L = MCU_BASE | 0x1900UL, + I2C1_SRST_L = MCU_BASE | 0x1980UL, + I2C2_SRST_L = MCU_BASE | 0x1a00UL, MCU_DVC_INT = MCU_BASE | 0x3000UL, MCU_DVC_INT_EN = MCU_BASE | 0x3080UL, SI_FAULT_STAT = MCU_BASE | 0x3100UL, diff --git a/arch/sw_64/include/asm/pci.h b/arch/sw_64/include/asm/pci.h index a90f80152470..ab79d503b84d 100644 --- a/arch/sw_64/include/asm/pci.h +++ b/arch/sw_64/include/asm/pci.h @@ -18,6 +18,13 @@ struct resource; struct sunway_iommu; struct page; +struct piu_saved { + unsigned long piuconfig0; + unsigned long piuconfig1; + unsigned long epdmabar; + unsigned long msiaddr; + unsigned long msiconfig[256]; +}; /* A controller. Used to manage multiple PCI busses. */ diff --git a/arch/sw_64/include/asm/sw64_init.h b/arch/sw_64/include/asm/sw64_init.h index aae82f4163e0..893bac1c621b 100644 --- a/arch/sw_64/include/asm/sw64_init.h +++ b/arch/sw_64/include/asm/sw64_init.h @@ -32,7 +32,7 @@ struct sw64_chip_init_ops { struct sw64_chip_ops { int (*get_cpu_num)(void); void (*device_interrupt)(unsigned long irq_info); - void (*suspend)(int wake); + void (*suspend)(bool wake); void (*fixup)(void); }; diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index c0936d119c4e..b467562bce9e 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -616,6 +616,7 @@ void native_cpu_die(unsigned int cpu) if (per_cpu(cpu_state, cpu) == CPU_DEAD) { if (system_state == SYSTEM_RUNNING) pr_info("CPU %u is now offline\n", cpu); + smp_rcb->ready = 0; return; } msleep(100); diff --git a/arch/sw_64/kernel/suspend.c b/arch/sw_64/kernel/suspend.c index 994d8e245878..b9798baa2467 100644 --- a/arch/sw_64/kernel/suspend.c +++ b/arch/sw_64/kernel/suspend.c @@ -23,6 +23,8 @@ void disable_local_timer(void) wrtimer(0); } +extern struct pci_controller *hose_head; + /* * Boot Core will enter suspend stat here. */ @@ -32,6 +34,11 @@ void sw64_suspend_enter(void) * After wake up boot processor, pc will go here */ +#ifdef CONFIG_SW64_SUPPORT_S3_SLEEPING_STATE + if (sw64_chip->suspend) + sw64_chip->suspend(false); +#endif + disable_local_timer(); current_thread_info()->pcb.tp = rtid(); @@ -43,6 +50,11 @@ void sw64_suspend_enter(void) #endif wrtp(current_thread_info()->pcb.tp); +#ifdef CONFIG_SW64_SUPPORT_S3_SLEEPING_STATE + if (sw64_chip->suspend) + sw64_chip->suspend(true); +#endif + disable_local_timer(); } @@ -57,6 +69,7 @@ static const struct platform_suspend_ops native_suspend_ops = { .valid = native_suspend_state_valid, .enter = native_suspend_enter, }; + static int __init sw64_pm_init(void) { suspend_set_ops(&native_suspend_ops); -- Gitee From c1c6a2733267adad3a7bec645a6ce51162211db4 Mon Sep 17 00:00:00 2001 From: Min Fanlei Date: Tue, 26 Jul 2022 09:51:52 +0000 Subject: [PATCH 64/77] sw64: kvm: add guest live migration support Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XTKO -------------------------------- This patch adds live migration support for guest os. It requires hmcode of host and guest to be upgraded to activate this feature. Signed-off-by: Min Fanlei Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/kvm_asm.h | 2 + arch/sw_64/include/asm/vcpu.h | 10 ++++ arch/sw_64/kvm/Kconfig | 1 + arch/sw_64/kvm/handle_exit.c | 9 ++++ arch/sw_64/kvm/kvm-sw64.c | 85 +++++++++++++++++++++++++++++++- 5 files changed, 105 insertions(+), 2 deletions(-) diff --git a/arch/sw_64/include/asm/kvm_asm.h b/arch/sw_64/include/asm/kvm_asm.h index d408e90cee62..841bfa1dd0aa 100644 --- a/arch/sw_64/include/asm/kvm_asm.h +++ b/arch/sw_64/include/asm/kvm_asm.h @@ -4,6 +4,8 @@ #define SW64_KVM_EXIT_HOST_INTR 0 #define SW64_KVM_EXIT_IO 1 +#define SW64_KVM_MIGRATION_SET_DIRTY 2 +#define SW64_KVM_MIGRATION_SET_DIRTY_HM 3 #define SW64_KVM_EXIT_HALT 10 #define SW64_KVM_EXIT_SHUTDOWN 12 #define SW64_KVM_EXIT_TIMER 13 diff --git a/arch/sw_64/include/asm/vcpu.h b/arch/sw_64/include/asm/vcpu.h index dfefb9dc8651..c43ebe72e3a1 100644 --- a/arch/sw_64/include/asm/vcpu.h +++ b/arch/sw_64/include/asm/vcpu.h @@ -41,6 +41,16 @@ struct vcpucb { unsigned long exit_reason; unsigned long ipaddr; unsigned long vcpu_irq_vector; + unsigned long pri_base; + unsigned long stack_pc_dfault; + unsigned long guest_p20; + unsigned long guest_dfault_double; + unsigned long guest_irqs_pending; + unsigned long guest_hm_r30; + unsigned long migration_mark; + unsigned long guest_longtime; + unsigned long guest_longtime_offset; + unsigned long reserved[3]; }; #endif /* __ASSEMBLY__ */ diff --git a/arch/sw_64/kvm/Kconfig b/arch/sw_64/kvm/Kconfig index 4b6201ff5dc8..8077ea452765 100644 --- a/arch/sw_64/kvm/Kconfig +++ b/arch/sw_64/kvm/Kconfig @@ -29,6 +29,7 @@ config KVM select KVM_VFIO select TUN select GENERIC_ALLOCATOR + select KVM_GENERIC_DIRTYLOG_READ_PROTECT help Support for hosting Guest kernels. We don't support KVM with 3-level page tables yet. diff --git a/arch/sw_64/kvm/handle_exit.c b/arch/sw_64/kvm/handle_exit.c index 5016bc0eddc2..52f40a4c5803 100644 --- a/arch/sw_64/kvm/handle_exit.c +++ b/arch/sw_64/kvm/handle_exit.c @@ -13,9 +13,18 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index, struct hcall_args *hargs) { + gfn_t gfn; + switch (exception_index) { case SW64_KVM_EXIT_IO: return io_mem_abort(vcpu, run, hargs); + case SW64_KVM_MIGRATION_SET_DIRTY_HM: + case SW64_KVM_MIGRATION_SET_DIRTY: + gfn = hargs->arg2 >> 24; + mutex_lock(&vcpu->kvm->slots_lock); + kvm_vcpu_mark_page_dirty(vcpu, gfn); + mutex_unlock(&vcpu->kvm->slots_lock); + return 1; case SW64_KVM_EXIT_HALT: vcpu->arch.halted = 1; kvm_vcpu_block(vcpu); diff --git a/arch/sw_64/kvm/kvm-sw64.c b/arch/sw_64/kvm/kvm-sw64.c index 6afff2257947..ffcfdee58a48 100644 --- a/arch/sw_64/kvm/kvm-sw64.c +++ b/arch/sw_64/kvm/kvm-sw64.c @@ -133,6 +133,19 @@ static void sw64_kvm_switch_vpn(struct kvm_vcpu *vcpu) } } +static void check_vcpu_requests(struct kvm_vcpu *vcpu) +{ + unsigned long vpn; + long cpu = smp_processor_id(); + + if (kvm_request_pending(vcpu)) { + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { + vpn = vcpu->arch.vpnc[cpu] & HARDWARE_VPN_MASK; + tbivpn(0, 0, vpn); + } + } +} + struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; @@ -177,12 +190,47 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; } +/* + * kvm_mark_migration write the mark on every vcpucbs of the kvm, which tells + * the system to do migration while the mark is on, and flush all vcpu's tlbs + * at the beginning of the migration. + */ +void kvm_mark_migration(struct kvm *kvm, int mark) +{ + struct kvm_vcpu *vcpu; + int cpu; + + kvm_for_each_vcpu(cpu, vcpu, kvm) + vcpu->arch.vcb.migration_mark = mark << 2; + + kvm_flush_remote_tlbs(kvm); +} + void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change) { + /* + * At this point memslot has been committed and there is an + * allocated dirty_bitmap[], dirty pages will be be tracked while the + * memory slot is write protected. + */ + + /* If dirty logging has been stopped, do nothing for now. */ + if ((change != KVM_MR_DELETE) + && (old->flags & KVM_MEM_LOG_DIRTY_PAGES) + && (!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))) { + kvm_mark_migration(kvm, 0); + return; + } + + /* If it's the first time dirty logging, flush all vcpu tlbs. */ + if ((change == KVM_MR_FLAGS_ONLY) + && (!(old->flags & KVM_MEM_LOG_DIRTY_PAGES)) + && (new->flags & KVM_MEM_LOG_DIRTY_PAGES)) + kvm_mark_migration(kvm, 1); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -193,6 +241,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IRQCHIP: case KVM_CAP_IOEVENTFD: case KVM_CAP_SYNC_MMU: + case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -206,9 +255,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) return r; } -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn_offset, + unsigned long mask) { - return 0; } int kvm_sw64_pending_timer(struct kvm_vcpu *vcpu) @@ -547,6 +597,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) bool more; sigset_t sigsaved; + if (run->immediate_exit) + return -EINTR; + /* Set guest vcb */ /* vpn will update later when vcpu is running */ if (vcpu->arch.vcb.vpcr == 0) { @@ -613,6 +666,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu->arch.halted = 0; sw64_kvm_switch_vpn(vcpu); + check_vcpu_requests(vcpu); guest_enter_irqoff(); /* Enter the guest */ @@ -647,6 +701,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { + unsigned long result; struct kvm_vcpu *vcpu = filp->private_data; struct vcpucb *kvm_vcb; @@ -654,12 +709,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_SW64_VCPU_INIT: return kvm_arch_vcpu_reset(vcpu); case KVM_SW64_GET_VCB: + if (vcpu->arch.vcb.migration_mark) { + result = sw64_io_read(0, LONG_TIME); + vcpu->arch.vcb.guest_longtime = result; + vcpu->arch.vcb.guest_irqs_pending = vcpu->arch.irqs_pending[0]; + } + if (copy_to_user((void __user *)arg, &(vcpu->arch.vcb), sizeof(struct vcpucb))) return -EINVAL; break; case KVM_SW64_SET_VCB: kvm_vcb = memdup_user((void __user *)arg, sizeof(*kvm_vcb)); memcpy(&(vcpu->arch.vcb), kvm_vcb, sizeof(struct vcpucb)); + + if (vcpu->arch.vcb.migration_mark) { + /* updated vpcr needed by destination vm */ + vcpu->arch.vcb.vpcr + = get_vpcr(vcpu->kvm->arch.host_phys_addr, vcpu->kvm->arch.size, 0); + + result = sw64_io_read(0, LONG_TIME); + + /* synchronize the longtime of source and destination */ + vcpu->arch.vcb.guest_longtime_offset = vcpu->arch.vcb.guest_longtime - result; + + set_timer(vcpu, 200000000); + vcpu->arch.vcb.migration_mark = 0; + } break; default: return -EINVAL; @@ -702,6 +777,12 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { } +void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + kvm_flush_remote_tlbs(kvm); +} + int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) { return 0; -- Gitee From 4a20297e6ca491acadcd2928b021c557d4db957a Mon Sep 17 00:00:00 2001 From: He Sheng Date: Fri, 23 Sep 2022 10:58:14 +0800 Subject: [PATCH 65/77] sw64: invoke hmcall with HMC_* macros Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- It's better to use HMC_* macro instead of numberic constant. This patch also adds __CALL_HMC_VOID to define hmcalls with no return value including sflush(). Signed-off-by: He Sheng Reviewed-by: Cui Wei Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/hmcall.h | 24 ++++++++++++------------ arch/sw_64/include/asm/kgdb.h | 2 +- arch/sw_64/kernel/vdso/vgettimeofday.c | 9 ++++----- arch/sw_64/kernel/vdso/vrt_sigreturn.S | 3 ++- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/sw_64/include/asm/hmcall.h b/arch/sw_64/include/asm/hmcall.h index 04fcafac9e80..71d203efc587 100644 --- a/arch/sw_64/include/asm/hmcall.h +++ b/arch/sw_64/include/asm/hmcall.h @@ -55,16 +55,14 @@ extern void __init fixup_hmcall(void); extern void halt(void) __attribute__((noreturn)); -#define __halt() __asm__ __volatile__ ("sys_call %0 #halt" : : "i" (HMC_halt)) -#define fpu_enable() \ +#define __CALL_HMC_VOID(NAME) \ +static inline void NAME(void) \ { \ - __asm__ __volatile__("sys_call %0" : : "i" (HMC_wrfen));\ + __asm__ __volatile__( \ + "sys_call %0 ": : "i" (HMC_ ## NAME)); \ } -#define imb() \ - __asm__ __volatile__ ("sys_call %0 #imb" : : "i" (HMC_imb) : "memory") - #define __CALL_HMC_R0(NAME, TYPE) \ static inline TYPE NAME(void) \ { \ @@ -142,10 +140,14 @@ static inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1, TYPE2 arg2) \ return __r0; \ } -#define sflush() \ -{ \ - __asm__ __volatile__("sys_call 0x2f"); \ -} + +__CALL_HMC_VOID(imb); +__CALL_HMC_VOID(sflush); +__CALL_HMC_VOID(wrfen); +#define fpu_enable() wrfen() + +__CALL_HMC_VOID(sleepen); +__CALL_HMC_VOID(mtinten); __CALL_HMC_R0(rdps, unsigned long); @@ -164,8 +166,6 @@ __CALL_HMC_RW1(swpipl, unsigned long, unsigned long); __CALL_HMC_R0(whami, unsigned long); __CALL_HMC_RW1(rdio64, unsigned long, unsigned long); __CALL_HMC_RW1(rdio32, unsigned int, unsigned long); -__CALL_HMC_R0(sleepen, unsigned long); -__CALL_HMC_R0(mtinten, unsigned long); __CALL_HMC_W2(wrent, void*, unsigned long); __CALL_HMC_W2(tbisasn, unsigned long, unsigned long); __CALL_HMC_W1(wrkgp, unsigned long); diff --git a/arch/sw_64/include/asm/kgdb.h b/arch/sw_64/include/asm/kgdb.h index 1d807362e867..6478c7a989c3 100644 --- a/arch/sw_64/include/asm/kgdb.h +++ b/arch/sw_64/include/asm/kgdb.h @@ -34,7 +34,7 @@ static inline void arch_kgdb_breakpoint(void) { - asm __volatile__ ("sys_call/b 0x80"); + asm __volatile__ ("sys_call %0" : : "i"(HMC_bpt) ); } void sw64_task_to_gdb_regs(struct task_struct *task, unsigned long *regs); diff --git a/arch/sw_64/kernel/vdso/vgettimeofday.c b/arch/sw_64/kernel/vdso/vgettimeofday.c index b9c9a137f9d3..49bb4e2e66ed 100644 --- a/arch/sw_64/kernel/vdso/vgettimeofday.c +++ b/arch/sw_64/kernel/vdso/vgettimeofday.c @@ -16,6 +16,7 @@ #include #include +#include static __always_inline int syscall_fallback(clockid_t clkid, struct timespec64 *ts) { @@ -25,8 +26,8 @@ static __always_inline int syscall_fallback(clockid_t clkid, struct timespec64 * " mov %0, $16\n" " mov %1, $17\n" " ldi $0, %2\n" - " sys_call 0x83\n" - :: "r"(clkid), "r"(ts), "i"(__NR_clock_gettime) + " sys_call %3\n" + :: "r"(clkid), "r"(ts), "i"(__NR_clock_gettime), "i"(HMC_callsys) : "$0", "$16", "$17", "$19"); if (unlikely(r19)) return -r0; @@ -78,9 +79,7 @@ static __always_inline u64 read_longtime(void) register unsigned long __r0 __asm__("$0"); __asm__ __volatile__( - "sys_call 0xB1" - : "=r"(__r0) - ::"memory"); + "sys_call %1" : "=r"(__r0) : "i" (HMC_longtime)); return __r0; } diff --git a/arch/sw_64/kernel/vdso/vrt_sigreturn.S b/arch/sw_64/kernel/vdso/vrt_sigreturn.S index 6aa7aa300b4d..d2d7295ffa7a 100644 --- a/arch/sw_64/kernel/vdso/vrt_sigreturn.S +++ b/arch/sw_64/kernel/vdso/vrt_sigreturn.S @@ -19,6 +19,7 @@ #include #include +#include #define RT_SIGFRAME_SIZE 1600 #define RT_SIGFRAME_MCTX 176 @@ -64,6 +65,6 @@ ENTRY(__vdso_rt_sigreturn) mov $sp, $16 ldi $0, __NR_rt_sigreturn - sys_call 0x83 + sys_call HMC_callsys ENDPROC(__vdso_rt_sigreturn) .cfi_endproc -- Gitee From 9d4ad498e4316dc84ee770058b1c66a217759c60 Mon Sep 17 00:00:00 2001 From: He Sheng Date: Sun, 9 Oct 2022 09:18:48 +0800 Subject: [PATCH 66/77] sw64: print real address of sp in show_regs() Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- In show_regs(), we really want to print the address of stack pointer for debugging. Signed-off-by: He Sheng Reviewed-by: Cui Wei Signed-off-by: Gu Zitao --- arch/sw_64/kernel/traps.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index f01b88e53ff2..5fac85c29bf6 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -69,7 +69,8 @@ void show_regs(struct pt_regs *regs) regs->r22, regs->r23, regs->r24); printk("t11= %016lx pv = %016lx at = %016lx\n", regs->r25, regs->r27, regs->r28); - printk("gp = %016lx sp = %p\n", regs->gp, regs+1); + printk("gp = %016lx sp = %px\n", regs->gp, + user_mode(regs) ? (void *)rdusp() : (regs + 1)); } static void show_code(unsigned int *pc) -- Gitee From 9bbebd01321134e4806969549a5b8648a019e46b Mon Sep 17 00:00:00 2001 From: Xu Chenjiao Date: Sun, 9 Oct 2022 15:14:15 +0800 Subject: [PATCH 67/77] sw64: update openeuler_defconfig Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- Since suspend/resume is supported, let's enable this by default, and add some new configs brought by kernel updates. Signed-off-by: Xu Chenjiao Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/configs/openeuler_defconfig | 212 +++++++++++++------------ 1 file changed, 109 insertions(+), 103 deletions(-) diff --git a/arch/sw_64/configs/openeuler_defconfig b/arch/sw_64/configs/openeuler_defconfig index 0e77721dae36..e4cc4741e291 100644 --- a/arch/sw_64/configs/openeuler_defconfig +++ b/arch/sw_64/configs/openeuler_defconfig @@ -41,6 +41,7 @@ CONFIG_GENERIC_IRQ_LEGACY=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_IRQ_SHOW=y CONFIG_GENERIC_IRQ_MIGRATION=y +CONFIG_HARDIRQS_SW_RESEND=y CONFIG_GENERIC_IRQ_CHIP=y CONFIG_IRQ_DOMAIN=y CONFIG_IRQ_DOMAIN_HIERARCHY=y @@ -99,13 +100,6 @@ CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 # # Scheduler features # - -# -# Intelligent aware scheduler -# -# CONFIG_IAS_SMART_IDLE is not set -# CONFIG_IAS_SMART_LOAD_TRACKING is not set -# end of Intelligent aware scheduler # end of Scheduler features CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y @@ -116,9 +110,11 @@ CONFIG_PAGE_COUNTER=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_MEMCG_KMEM=y +# CONFIG_MEMCG_MEMFS_INFO is not set CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y CONFIG_CGROUP_SCHED=y +# CONFIG_SCHED_PRIO_LB is not set CONFIG_FAIR_GROUP_SCHED=y # CONFIG_CFS_BANDWIDTH is not set # CONFIG_RT_GROUP_SCHED is not set @@ -214,6 +210,8 @@ CONFIG_SLAB_MERGE_DEFAULT=y # CONFIG_SHUFFLE_PAGE_ALLOCATOR is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_PROFILING is not set +CONFIG_KABI_RESERVE=y +CONFIG_KABI_SIZE_ALIGN_CHECKS=y # end of General setup CONFIG_SW64=y @@ -225,6 +223,7 @@ CONFIG_SYS_SUPPORTS_HUGETLBFS=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y +CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_ZONE_DMA32=y CONFIG_NEED_DMA_MAP_STATE=y CONFIG_NEED_SG_DMA_LENGTH=y @@ -249,14 +248,19 @@ CONFIG_SW64_ASIC=y # CONFIG_SW64_CHIP3_ASIC_DEBUG is not set CONFIG_CPUFREQ_DEBUGFS=y CONFIG_PLATFORM_XUELANG=y +# CONFIG_MIGHT_HAVE_PC_SERIO is not set # end of Machine Configuration # CONFIG_LOCK_MEMB is not set -# CONFIG_DIRECT_DMA is not set -CONFIG_SWIOTLB=y + +# +# CPU Frequency scaling +# +# CONFIG_CPU_FREQ is not set +# end of CPU Frequency scaling + CONFIG_ISA=y CONFIG_ISA_DMA_API=y -CONFIG_PCI=y CONFIG_PCI_DOMAINS=y CONFIG_PCI_SYSCALL=y CONFIG_IOMMU_HELPER=y @@ -273,77 +277,11 @@ CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_NR_CPUS=64 CONFIG_HOTPLUG_CPU=y CONFIG_ARCH_SPARSEMEM_ENABLE=y -# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_NUMA=y CONFIG_USE_PERCPU_NUMA_NODE_ID=y CONFIG_NODES_SHIFT=7 # CONFIG_RELOCATABLE is not set CONFIG_HZ=100 -# CONFIG_PCIEPORTBUS is not set -CONFIG_PCIEASPM=y -CONFIG_PCIEASPM_DEFAULT=y -# CONFIG_PCIEASPM_POWERSAVE is not set -# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set -# CONFIG_PCIEASPM_PERFORMANCE is not set -# CONFIG_PCIE_PTM is not set -CONFIG_PCI_MSI=y -CONFIG_PCI_MSI_IRQ_DOMAIN=y -CONFIG_PCI_MSI_ARCH_FALLBACKS=y -CONFIG_PCI_QUIRKS=y -# CONFIG_PCI_DEBUG is not set -# CONFIG_PCI_REALLOC_ENABLE_AUTO is not set -# CONFIG_PCI_STUB is not set -# CONFIG_PCI_PF_STUB is not set -CONFIG_PCI_ATS=y -CONFIG_PCI_IOV=y -# CONFIG_PCI_PRI is not set -# CONFIG_PCI_PASID is not set -CONFIG_PCI_LABEL=y -# CONFIG_PCIE_BUS_TUNE_OFF is not set -CONFIG_PCIE_BUS_DEFAULT=y -# CONFIG_PCIE_BUS_SAFE is not set -# CONFIG_PCIE_BUS_PERFORMANCE is not set -# CONFIG_PCIE_BUS_PEER2PEER is not set -# CONFIG_HOTPLUG_PCI is not set - -# -# PCI controller drivers -# -# CONFIG_PCI_FTPCI100 is not set -# CONFIG_PCI_HOST_GENERIC is not set -# CONFIG_PCIE_XILINX is not set - -# -# DesignWare PCI Core Support -# -# CONFIG_PCIE_DW_PLAT_HOST is not set -# CONFIG_PCI_MESON is not set -# end of DesignWare PCI Core Support - -# -# Mobiveil PCIe Core Support -# -# end of Mobiveil PCIe Core Support - -# -# Cadence PCIe controllers support -# -# CONFIG_PCIE_CADENCE_PLAT_HOST is not set -# CONFIG_PCI_J721E_HOST is not set -# end of Cadence PCIe controllers support -# end of PCI controller drivers - -# -# PCI Endpoint -# -# end of PCI Endpoint - -# -# PCI switch controller drivers -# -# CONFIG_PCI_SW_SWITCHTEC is not set -# end of PCI switch controller drivers - # CONFIG_PCCARD is not set # @@ -353,8 +291,6 @@ CONFIG_BINFMT_ELF=y CONFIG_ELFCORE=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_SCRIPT=y -CONFIG_HAVE_AOUT=y -# CONFIG_BINFMT_AOUT is not set # CONFIG_BINFMT_MISC is not set CONFIG_COREDUMP=y # end of Executable file formats @@ -373,7 +309,6 @@ CONFIG_DEEP_MEMSET=y # # Boot options # -CONFIG_SW64_IRQ_CHIP=y CONFIG_USE_OF=y # CONFIG_SW64_BUILTIN_DTB is not set CONFIG_EFI=y @@ -413,9 +348,18 @@ CONFIG_EFI_RUNTIME_WRAPPERS=y # # Power management options # -# CONFIG_SUSPEND is not set +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +# CONFIG_SUSPEND_SKIP_SYNC is not set # CONFIG_HIBERNATION is not set -# CONFIG_PM is not set +CONFIG_PM_SLEEP=y +CONFIG_PM_SLEEP_SMP=y +# CONFIG_PM_AUTOSLEEP is not set +# CONFIG_PM_WAKELOCKS is not set +CONFIG_PM=y +# CONFIG_PM_DEBUG is not set +CONFIG_PM_CLK=y +# CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set CONFIG_ARCH_SUPPORTS_ACPI=y CONFIG_ACPI=y # CONFIG_ACPI_DEBUGGER is not set @@ -424,6 +368,7 @@ CONFIG_ACPI=y CONFIG_ACPI_AC=y CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y +CONFIG_ACPI_TAD=y # CONFIG_ACPI_DOCK is not set CONFIG_ACPI_CUSTOM_DSDT_FILE="" # CONFIG_ACPI_DEBUG is not set @@ -436,6 +381,9 @@ CONFIG_ACPI_REDUCED_HARDWARE_ONLY=y # CONFIG_PMIC_OPREGION is not set CONFIG_ARCH_SUSPEND_POSSIBLE=y CONFIG_ARCH_HIBERNATION_POSSIBLE=y +CONFIG_SW64_SUSPEND_DEEPSLEEP_NONBOOT_CORE=y +CONFIG_SW64_SUSPEND_DEEPSLEEP_BOOTCORE=y +# CONFIG_SW64_SUPPORT_S3_SLEEPING_STATE is not set # # CPU Idle @@ -444,13 +392,13 @@ CONFIG_ARCH_HIBERNATION_POSSIBLE=y # end of CPU Idle # end of Power management options -CONFIG_DUMMY_CONSOLE=y CONFIG_HAVE_KVM_IRQCHIP=y CONFIG_HAVE_KVM_IRQFD=y CONFIG_HAVE_KVM_IRQ_ROUTING=y CONFIG_HAVE_KVM_EVENTFD=y CONFIG_HAVE_KVM_MSI=y CONFIG_KVM_VFIO=y +CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=y CONFIG_KVM_SW64_HOST=y @@ -473,8 +421,9 @@ CONFIG_HAVE_OPROFILE=y CONFIG_HAVE_64BIT_ALIGNED_ACCESS=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_GENERIC_SMP_IDLE_THREAD=y -CONFIG_HAVE_ASM_MODVERSIONS=y +CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y CONFIG_HAVE_PERF_REGS=y CONFIG_HAVE_PERF_USER_STACK_DUMP=y CONFIG_HAVE_ARCH_JUMP_LABEL=y @@ -491,7 +440,6 @@ CONFIG_MODULES_USE_ELF_RELA=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y CONFIG_ISA_BUS_API=y CONFIG_OLD_SIGSUSPEND=y -CONFIG_OLD_SIGACTION=y # CONFIG_COMPAT_32BIT_TIME is not set CONFIG_ARCH_NO_PREEMPT=y CONFIG_ARCH_HAS_PHYS_TO_DMA=y @@ -511,7 +459,6 @@ CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y -CONFIG_ASM_MODVERSIONS=y # CONFIG_MODULE_SRCVERSION_ALL is not set # CONFIG_MODULE_SIG is not set # CONFIG_MODULE_COMPRESS is not set @@ -564,6 +511,7 @@ CONFIG_EFI_PARTITION=y CONFIG_BLK_MQ_PCI=y CONFIG_BLK_MQ_VIRTIO=y CONFIG_BLK_MQ_RDMA=y +CONFIG_BLK_PM=y # # IO Schedulers @@ -665,7 +613,7 @@ CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y # CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set # CONFIG_CLEANCACHE is not set # CONFIG_FRONTSWAP is not set -# CONFIG_SHRINK_PAGECACHE is not set +# CONFIG_PAGE_CACHE_LIMIT is not set CONFIG_CMA=y # CONFIG_CMA_DEBUG is not set # CONFIG_CMA_DEBUGFS is not set @@ -679,6 +627,7 @@ CONFIG_HMM_MIRROR=y # CONFIG_PERCPU_STATS is not set # CONFIG_GUP_BENCHMARK is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set +CONFIG_ARCH_HAS_PTE_SPECIAL=y # # Data Access Monitoring @@ -775,7 +724,6 @@ CONFIG_DEFAULT_CUBIC=y # CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_TCP_MD5SIG=y -# CONFIG_TCP_COMP is not set CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y @@ -1318,14 +1266,48 @@ CONFIG_HAVE_EBPF_JIT=y # # Device Drivers # +CONFIG_HAVE_PCI=y +CONFIG_PCI=y +CONFIG_PCIEPORTBUS=y +CONFIG_PCIEAER=y +# CONFIG_PCIEAER_INJECT is not set +# CONFIG_PCIE_ECRC is not set +# CONFIG_PCIEASPM is not set +CONFIG_PCIE_PME=y +# CONFIG_PCIE_DPC is not set +# CONFIG_PCIE_PTM is not set +CONFIG_PCI_MSI=y +CONFIG_PCI_MSI_IRQ_DOMAIN=y +CONFIG_PCI_MSI_ARCH_FALLBACKS=y +CONFIG_PCI_QUIRKS=y +# CONFIG_PCI_DEBUG is not set +# CONFIG_PCI_REALLOC_ENABLE_AUTO is not set +# CONFIG_PCI_STUB is not set +# CONFIG_PCI_PF_STUB is not set +CONFIG_PCI_ATS=y +CONFIG_PCI_IOV=y +# CONFIG_PCI_PRI is not set +# CONFIG_PCI_PASID is not set +CONFIG_PCI_LABEL=y +# CONFIG_PCIE_BUS_TUNE_OFF is not set +CONFIG_PCIE_BUS_DEFAULT=y +# CONFIG_PCIE_BUS_SAFE is not set +# CONFIG_PCIE_BUS_PERFORMANCE is not set +# CONFIG_PCIE_BUS_PEER2PEER is not set +# CONFIG_HOTPLUG_PCI is not set # # PCI controller drivers # +# CONFIG_PCI_FTPCI100 is not set +# CONFIG_PCI_HOST_GENERIC is not set +# CONFIG_PCIE_XILINX is not set # # DesignWare PCI Core Support # +# CONFIG_PCIE_DW_PLAT_HOST is not set +# CONFIG_PCI_MESON is not set # end of DesignWare PCI Core Support # @@ -1336,17 +1318,21 @@ CONFIG_HAVE_EBPF_JIT=y # # Cadence PCIe controllers support # +# CONFIG_PCIE_CADENCE_PLAT_HOST is not set +# CONFIG_PCI_J721E_HOST is not set # end of Cadence PCIe controllers support # end of PCI controller drivers # # PCI Endpoint # +# CONFIG_PCI_ENDPOINT is not set # end of PCI Endpoint # # PCI switch controller drivers # +# CONFIG_PCI_SW_SWITCHTEC is not set # end of PCI switch controller drivers # CONFIG_RAPIDIO is not set @@ -1368,6 +1354,7 @@ CONFIG_FW_LOADER=y CONFIG_EXTRA_FIRMWARE="" # CONFIG_FW_LOADER_USER_HELPER is not set # CONFIG_FW_LOADER_COMPRESS is not set +CONFIG_FW_CACHE=y # end of Firmware loader CONFIG_ALLOW_DEV_COREDUMP=y @@ -1375,6 +1362,8 @@ CONFIG_ALLOW_DEV_COREDUMP=y # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set # CONFIG_TEST_ASYNC_DRIVER_PROBE is not set +CONFIG_REGMAP=y +CONFIG_REGMAP_I2C=y CONFIG_DMA_SHARED_BUFFER=y # CONFIG_DMA_FENCE_TRACE is not set # end of Generic Driver Options @@ -1383,6 +1372,7 @@ CONFIG_DMA_SHARED_BUFFER=y # Bus devices # # CONFIG_MOXTET is not set +# CONFIG_SIMPLE_PM_BUS is not set # CONFIG_MHI_BUS is not set # end of Bus devices @@ -1725,6 +1715,7 @@ CONFIG_PATA_TIMINGS=y CONFIG_ATA_VERBOSE_ERROR=y CONFIG_ATA_FORCE=y CONFIG_ATA_ACPI=y +# CONFIG_SATA_ZPODD is not set CONFIG_SATA_PMP=y # @@ -1988,7 +1979,6 @@ CONFIG_NET_VENDOR_QLOGIC=y # CONFIG_NETXEN_NIC is not set # CONFIG_QED is not set # CONFIG_NET_VENDOR_QUALCOMM is not set -CONFIG_NET_VENDOR_RAMAXEL=y CONFIG_NET_VENDOR_RDC=y # CONFIG_R6040 is not set CONFIG_NET_VENDOR_REALTEK=y @@ -2122,7 +2112,6 @@ CONFIG_USB_NET_DRIVERS=y # CONFIG_NETDEVSIM is not set CONFIG_NET_FAILOVER=y # CONFIG_ISDN is not set -# CONFIG_NVM is not set # # Input device support @@ -2182,8 +2171,6 @@ CONFIG_INPUT_KEYBOARD=y # Hardware I/O ports # CONFIG_SERIO=y -CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y -# CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set # CONFIG_SERIO_PCIPS2 is not set CONFIG_SERIO_LIBPS2=y @@ -2204,6 +2191,7 @@ CONFIG_TTY=y CONFIG_VT=y CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_VT_CONSOLE=y +CONFIG_VT_CONSOLE_SLEEP=y CONFIG_HW_CONSOLE=y CONFIG_VT_HW_CONSOLE_BINDING=y CONFIG_UNIX98_PTYS=y @@ -2269,7 +2257,7 @@ CONFIG_VIRTIO_CONSOLE=y CONFIG_DEVMEM=y # CONFIG_DEVKMEM is not set # CONFIG_RAW_DRIVER is not set -CONFIG_DEVPORT=y +# CONFIG_DEVPORT is not set # CONFIG_TCG_TPM is not set # CONFIG_XILLYBUS is not set # end of Character devices @@ -2464,6 +2452,7 @@ CONFIG_HWMON=y # # Native drivers # +CONFIG_SENSORS_PVT=y # CONFIG_SENSORS_AD7314 is not set # CONFIG_SENSORS_AD7414 is not set # CONFIG_SENSORS_AD7418 is not set @@ -2532,7 +2521,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_LM63 is not set # CONFIG_SENSORS_LM70 is not set # CONFIG_SENSORS_LM73 is not set -# CONFIG_SENSORS_LM75 is not set +CONFIG_SENSORS_LM75=y # CONFIG_SENSORS_LM77 is not set # CONFIG_SENSORS_LM78 is not set # CONFIG_SENSORS_LM80 is not set @@ -2643,6 +2632,7 @@ CONFIG_BCMA_POSSIBLE=y # CONFIG_LPC_SCH is not set # CONFIG_LPC_CHIP3 is not set # CONFIG_SUNWAY_SUPERIO_AST2400 is not set +# CONFIG_MFD_INTEL_PMT is not set # CONFIG_MFD_IQS62X is not set # CONFIG_MFD_JANZ_CMODIO is not set # CONFIG_MFD_KEMPLD is not set @@ -2938,9 +2928,11 @@ CONFIG_HDMI=y # # CONFIG_VGA_CONSOLE is not set # CONFIG_MDA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y CONFIG_DUMMY_CONSOLE_COLUMNS=80 CONFIG_DUMMY_CONSOLE_ROWS=25 CONFIG_FRAMEBUFFER_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION is not set CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y # CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER is not set @@ -3073,6 +3065,7 @@ CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_DEFAULT_PERSIST=y # CONFIG_USB_FEW_INIT_RETRIES is not set # CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OTG_DISABLE_EXTERNAL_HUB is not set CONFIG_USB_AUTOSUSPEND_DELAY=2 @@ -3507,8 +3500,9 @@ CONFIG_SUNWAY_IOMMU=y # # IRQ chip support # -CONFIG_SW64_INTC=y CONFIG_IRQCHIP=y +CONFIG_SW64_INTC_V2=y +CONFIG_SW64_LPC_INTC=y # CONFIG_AL_FIC is not set # end of IRQ chip support @@ -3537,7 +3531,7 @@ CONFIG_IRQCHIP=y # # end of Performance monitor support -# CONFIG_RAS is not set +CONFIG_RAS=y # CONFIG_USB4 is not set # @@ -3546,6 +3540,11 @@ CONFIG_IRQCHIP=y # CONFIG_ANDROID is not set # end of Android +# +# Vendor Hooks +# +# end of Vendor Hooks + # CONFIG_LIBNVDIMM is not set # CONFIG_DAX is not set CONFIG_NVMEM=y @@ -3657,6 +3656,7 @@ CONFIG_FAT_DEFAULT_UTF8=y CONFIG_NTFS_FS=y # CONFIG_NTFS_DEBUG is not set CONFIG_NTFS_RW=y +# CONFIG_NTFS3_FS is not set # end of DOS/FAT/EXFAT/NT Filesystems # @@ -3675,6 +3675,7 @@ CONFIG_TMPFS_XATTR=y # CONFIG_TMPFS_INODE64 is not set CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y +# CONFIG_ENHANCED_HUGETLB_MMAP is not set CONFIG_MEMFD_CREATE=y CONFIG_CONFIGFS_FS=y CONFIG_EFIVAR_FS=m @@ -3935,7 +3936,7 @@ CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=y # CONFIG_CRYPTO_SHA512 is not set # CONFIG_CRYPTO_SHA3 is not set -# CONFIG_CRYPTO_SM3 is not set +# CONFIG_CRYPTO_SM3_GENERIC is not set # CONFIG_CRYPTO_STREEBOG is not set # CONFIG_CRYPTO_TGR192 is not set # CONFIG_CRYPTO_WP512 is not set @@ -3954,7 +3955,7 @@ CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_SALSA20 is not set # CONFIG_CRYPTO_CHACHA20 is not set # CONFIG_CRYPTO_SERPENT is not set -# CONFIG_CRYPTO_SM4 is not set +# CONFIG_CRYPTO_SM4_GENERIC is not set # CONFIG_CRYPTO_TWOFISH is not set # @@ -4074,6 +4075,7 @@ CONFIG_HAS_DMA=y CONFIG_DMA_OPS=y CONFIG_ARCH_DMA_ADDR_T_64BIT=y CONFIG_DMA_DECLARE_COHERENT=y +CONFIG_SWIOTLB=y # CONFIG_DMA_API_DEBUG is not set CONFIG_SGL_ALLOC=y CONFIG_CPU_RMAP=y @@ -4107,9 +4109,11 @@ CONFIG_SBITMAP=y CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 CONFIG_CONSOLE_LOGLEVEL_QUIET=7 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 +# CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_DYNAMIC_DEBUG is not set # CONFIG_DYNAMIC_DEBUG_CORE is not set CONFIG_SYMBOLIC_ERRNAME=y +CONFIG_DEBUG_BUGVERBOSE=y # end of printk and dmesg options # @@ -4126,7 +4130,7 @@ CONFIG_FRAME_WARN=2048 CONFIG_SECTION_MISMATCH_WARN_ONLY=y # CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B is not set CONFIG_ARCH_WANT_FRAME_POINTERS=y -CONFIG_FRAME_POINTER=y +# CONFIG_FRAME_POINTER is not set # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # end of Compile-time checks and compiler options @@ -4208,6 +4212,7 @@ CONFIG_LOCK_DEBUGGING_SUPPORT=y CONFIG_STACKTRACE=y # CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set # CONFIG_DEBUG_KOBJECT is not set +CONFIG_HAVE_DEBUG_BUGVERBOSE=y # # Debug kernel data structures @@ -4235,7 +4240,6 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=21 # CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set -# CONFIG_LATENCYTOP is not set CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y @@ -4254,7 +4258,7 @@ CONFIG_EARLY_PRINTK=y CONFIG_MATHEMU=y CONFIG_STACKTRACE_SUPPORT=y # CONFIG_SW64_RRU is not set -# CONFIG_SW64_RRK is not set +CONFIG_SW64_RRK=y # end of sw_64 Debugging # @@ -4306,7 +4310,9 @@ CONFIG_RUNTIME_TESTING_MENU=y # CONFIG_TEST_FREE_PAGES is not set # CONFIG_MEMTEST is not set # end of Kernel Testing and Coverage -# end of Kernel hacking -CONFIG_KABI_SIZE_ALIGN_CHECKS=y -CONFIG_KABI_RESERVE=y +# +# Rust hacking +# +# end of Rust hacking +# end of Kernel hacking -- Gitee From 6769f44c20bea54e59a54403192c8f6f620d4b46 Mon Sep 17 00:00:00 2001 From: Hang Xiaoqian Date: Sun, 9 Oct 2022 17:01:25 +0800 Subject: [PATCH 68/77] sw64: fix compile errors when CONFIG_STACKTRACE is not set Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56QAM -------------------------------- The stacktrace.c should be always compiled. Signed-off-by: Hang Xiaoqian Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/sw_64/kernel/Makefile b/arch/sw_64/kernel/Makefile index 02facabae2d9..850f6dfddd1f 100644 --- a/arch/sw_64/kernel/Makefile +++ b/arch/sw_64/kernel/Makefile @@ -17,10 +17,9 @@ obj-y := entry.o fpu.o traps.o process.o sys_sw64.o irq.o \ irq_sw64.o signal.o setup.o ptrace.o time.o \ systbls.o dup_print.o tc.o timer.o \ insn.o early_init.o topology.o cacheinfo.o \ - vdso.o vdso/ hmcall.o + vdso.o vdso/ hmcall.o stacktrace.o obj-$(CONFIG_ACPI) += acpi.o -obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PCI) += pci.o pci-sysfs.o obj-$(CONFIG_MODULES) += module.o -- Gitee From 8721bed0acab651a7a36a1abe2a61ff73af2ade6 Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Mon, 10 Oct 2022 14:26:25 +0800 Subject: [PATCH 69/77] sw64: adjust layout of clear_user.S Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XTLH -------------------------------- Adjust layout of clear_user.S to make sure we can get the correct symbol name when tracing. Signed-off-by: Mao Minkai Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/lib/clear_user.S | 64 ++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/arch/sw_64/lib/clear_user.S b/arch/sw_64/lib/clear_user.S index 88d332032c9d..5ac77fc8ca0d 100644 --- a/arch/sw_64/lib/clear_user.S +++ b/arch/sw_64/lib/clear_user.S @@ -27,6 +27,38 @@ .ent __clear_user .frame $30, 0, $26 .prologue 0 +__clear_user: + and $17, $17, $0 + and $16, 7, $4 + beq $0, $zerolength + addl $0, $4, $1 + and $1, 7, $2 + srl $1, 3, $1 + beq $4, $loop + + subl $4, 8, $4 + addl $0, $4, $0 + beq $1, $oneword + +$head: + EX(stb $31, 0($16)) + addl $16, 1, $16 + addl $4, 1, $4 + bne $4, $head + subl $1, 1, $1 + br $loop + unop + +$oneword: + EX(stb $31, 0($16)) + addl $16, 1, $16 + addl $4, 1, $4 + bne $4, $oneword + clr $0 + +$zerolength: +$exception: + ret $31, ($26), 1 $loop: and $1, 3, $4 @@ -66,37 +98,5 @@ $tail: clr $0 ret $31, ($26), 1 -__clear_user: - and $17, $17, $0 - and $16, 7, $4 - beq $0, $zerolength - addl $0, $4, $1 - and $1, 7, $2 - srl $1, 3, $1 - beq $4, $loop - - subl $4, 8, $4 - addl $0, $4, $0 - beq $1, $oneword - -$head: - EX(stb $31, 0($16)) - addl $16, 1, $16 - addl $4, 1, $4 - bne $4, $head - subl $1, 1, $1 - br $loop - unop - -$oneword: - EX(stb $31, 0($16)) - addl $16, 1, $16 - addl $4, 1, $4 - bne $4, $oneword - clr $0 - -$zerolength: -$exception: - ret $31, ($26), 1 .end __clear_user EXPORT_SYMBOL(__clear_user) -- Gitee From 7c80a502160782902217506b5c940844fa1e282c Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Mon, 10 Oct 2022 14:26:25 +0800 Subject: [PATCH 70/77] sw64: add deep-set-template.S Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XTLH -------------------------------- Add deep-set-template.S to rewrite memset() and optimize __clear_user(). Signed-off-by: Mao Minkai Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/lib/Kconfig | 7 ++ arch/sw_64/lib/Makefile | 6 +- arch/sw_64/lib/deep-clear_user.S | 48 +++++++++++ arch/sw_64/lib/deep-memset.S | 94 ++------------------ arch/sw_64/lib/deep-set_template.S | 133 +++++++++++++++++++++++++++++ 5 files changed, 197 insertions(+), 91 deletions(-) create mode 100644 arch/sw_64/lib/deep-clear_user.S create mode 100644 arch/sw_64/lib/deep-set_template.S diff --git a/arch/sw_64/lib/Kconfig b/arch/sw_64/lib/Kconfig index d1e9cdd3947a..e22751a457ce 100644 --- a/arch/sw_64/lib/Kconfig +++ b/arch/sw_64/lib/Kconfig @@ -8,6 +8,13 @@ config DEEP_CLEAR_PAGE This option enables the use of SIMD version of clear page routine. Say N if you want to use the generic version. +config DEEP_CLEAR_USER + bool "Clear User with SIMD optimization" + default y + help + This option enables the use of SIMD version of clear user routine. + Say N if you want to use the generic version. + config DEEP_COPY_PAGE bool "Copy Page with SIMD optimization" default y diff --git a/arch/sw_64/lib/Makefile b/arch/sw_64/lib/Makefile index bb2e9b52fedc..e4727dce3655 100644 --- a/arch/sw_64/lib/Makefile +++ b/arch/sw_64/lib/Makefile @@ -11,7 +11,6 @@ lib-y = __divlu.o __remlu.o __divwu.o __remwu.o \ memmove.o \ checksum.o \ csum_partial_copy.o \ - clear_user.o \ fpreg.o \ strcpy.o \ strncpy.o \ @@ -21,6 +20,9 @@ lib-y = __divlu.o __remlu.o __divwu.o __remwu.o \ lib-clear_page-y := clear_page.o lib-clear_page-$(CONFIG_DEEP_CLEAR_PAGE) := deep-clear_page.o +lib-clear_user-y := clear_user.o +lib-clear_user-$(CONFIG_DEEP_CLEAR_USER) := deep-clear_user.o + lib-copy_page-y := copy_page.o lib-copy_page-$(CONFIG_DEEP_COPY_PAGE) := deep-copy_page.o @@ -33,7 +35,7 @@ lib-memcpy-$(CONFIG_DEEP_MEMCPY) := deep-memcpy.o lib-memset-y := memset.o lib-memset-$(CONFIG_DEEP_MEMSET) := deep-memset.o -lib-y += $(lib-clear_page-y) $(lib-copy_page-y) $(lib-copy_user-y) $(lib-memcpy-y) $(lib-memset-y) +lib-y += $(lib-clear_page-y) $(lib-clear_user-y) $(lib-copy_page-y) $(lib-copy_user-y) $(lib-memcpy-y) $(lib-memset-y) obj-y = iomap.o obj-y += iomap_copy.o diff --git a/arch/sw_64/lib/deep-clear_user.S b/arch/sw_64/lib/deep-clear_user.S new file mode 100644 index 000000000000..521586a7189f --- /dev/null +++ b/arch/sw_64/lib/deep-clear_user.S @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Contributed by Mao Minkai + * + * Zero user space, handling exceptions as we go. + * + * We have to make sure that $0 is always up-to-date and contains the + * right "bytes left to zero" value (and that it is updated only _after_ + * a successful copy). There is also some rather minor exception setup + * stuff. + * + */ +#include +/* Allow an exception for an insn; exit if we get one. */ +#define FIXUP_LDST(x,y...) \ + 99: x,##y; \ + .section __ex_table,"a"; \ + .long 99b - .; \ + ldi $31, $out-99b($31); \ + .previous + +/* + * $7: SIMD status + * 0: not in simd loop + * 1: in simd loop + * 2: in simd_u loop + * $18: bytes left to copy + * + */ + .globl __clear_user + .ent __clear_user +__clear_user: + .prologue 0 + bis $31, $31, $7 + mov $17, $18 + bis $31, $31, $17 +#include "deep-set_template.S" +$out: + bis $31, $18, $0 + beq $7, $return + +$restore_simd: + RESTORE_SIMD_REGS + +$return: + ret + .end __clear_user + EXPORT_SYMBOL(__clear_user) diff --git a/arch/sw_64/lib/deep-memset.S b/arch/sw_64/lib/deep-memset.S index 7fbd529c72a8..5d9beb1e2f53 100644 --- a/arch/sw_64/lib/deep-memset.S +++ b/arch/sw_64/lib/deep-memset.S @@ -27,7 +27,8 @@ #include -#define NC_STORE_THRESHOLD 2048 +#define FIXUP_LDST(x, y) \ + x, y .set noat .set noreorder @@ -53,94 +54,9 @@ ___memset: bis $17, $4, $17 __constant_c_memset: - bis $31, $16, $0 # set return value - beq $18, $out # return if size is 0 - cmplt $18, 8, $5 # size less than 8, do 1-byte loop - bne $5, $tail_loop - -/* loop until SRC is 8 bytes aligned */ - .align 5 -$head_loop: - and $16, 0x7, $1 - beq $1, $mod8_aligned - stb $17, 0($16) - subl $18, 1, $18 - beq $18, $out - addl $16, 1, $16 - br $31, $head_loop - -$mod8_aligned: - -/* set 8 bytes each time */ - .align 5 -$mod8_loop: - and $16, 0x1f, $1 - beq $1, $mod32_aligned - subl $18, 8, $18 - blt $18, $tail - stl $17, 0($16) - addl $16, 8, $16 - br $31, $mod8_loop - -/* expand data to 32 bytes */ -$mod32_aligned: - subl $sp, 64, $sp - addl $sp, 31, $4 - bic $4, 0x1f, $4 - vstd $f10, 0($4) - ifmovd $17, $f10 - vcpyf $f10, $f10 - - ldi $1, NC_STORE_THRESHOLD($31) - cmple $18, $1, $1 - bne $1, $mod32_loop - -/* set 64 bytes each time */ - .align 5 -$mod32_loop_nc: - subl $18, 64, $18 - blt $18, $mod32_tail_memb - vstd_nc $f10, 0($16) - vstd_nc $f10, 32($16) - addl $16, 64, $16 - br $31, $mod32_loop_nc - - .align 5 -$mod32_loop: - subl $18, 64, $18 - blt $18, $mod32_tail - vstd $f10, 0($16) - vstd $f10, 32($16) - addl $16, 64, $16 - br $31, $mod32_loop - -$mod32_tail_memb: - memb # required for _nc store instructions -$mod32_tail: - vldd $f10, 0($4) - addl $sp, 64, $sp - addl $18, 64, $18 - .align 5 -$mod32_tail_loop: - subl $18, 8, $18 - blt $18, $tail - stl $17, 0($16) - addl $16, 8, $16 - br $31, $mod32_tail_loop - -$tail: - addl $18, 8, $18 - -/* set one byte each time */ - .align 5 -$tail_loop: - beq $18, $out - stb $17, 0($16) - subl $18, 1, $18 - addl $16, 1, $16 - br $31, $tail_loop - -/* done, return */ + bis $31, $31, $7 + bis $31, $16, $0 +#include "deep-set_template.S" $out: ret diff --git a/arch/sw_64/lib/deep-set_template.S b/arch/sw_64/lib/deep-set_template.S new file mode 100644 index 000000000000..f9073d638468 --- /dev/null +++ b/arch/sw_64/lib/deep-set_template.S @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * template for memcpy and copy_user with SIMD + * + * $7: SIMD status + * 0: not in simd loop + * 1: in simd loop + * 2: in simd_u loop + * $16: latest dest, clobbered + * $17: 8-byte data to set + * $18: bytes left to copy + * + */ + +#define NC_STORE_THRESHOLD 2048 + +#define SAVE_SIMD_REGS \ + ldi $sp, -0x40($sp); \ + addl $sp, 0x1f, $23; \ + bic $23, 0x1f, $23; \ + vstd $f1, 0($23); \ + ldi $7, 1 + +#define RESTORE_SIMD_REGS \ + vldd $f1, 0($23); \ + ldi $sp, 0x40($sp); \ + bis $31, $31, $7 + + ble $18, $out + and $16, 7, $1 + beq $1, $dest_aligned_8 + + .align 3 +$byte_loop_head: + FIXUP_LDST( stb $17, 0($16) ) + subl $18, 1, $18 + addl $16, 1, $16 + ble $18, $out + and $16, 7, $1 + bne $1, $byte_loop_head + +$dest_aligned_8: + cmplt $18, 16, $1 + bne $1, $quad_loop_end + and $16, 31, $1 + beq $1, $dest_aligned_32 + cmplt $18, 64, $1 + bne $1, $simd_end + + .align 3 +$quad_loop_head: + FIXUP_LDST( stl $17, 0($16) ) + addl $16, 8, $16 + subl $18, 8, $18 + and $16, 31, $1 + beq $1, $dest_aligned_32 + br $31, $quad_loop_head + +$dest_aligned_32: + cmplt $18, 64, $1 + bne $1, $simd_end + +$prep_simd_loop: + SAVE_SIMD_REGS + ifmovd $17, $f1 + vcpyf $f1, $f1 + ldi $1, NC_STORE_THRESHOLD($31) + cmple $18, $1, $1 + bne $1, $simd_loop + + .align 3 +$simd_loop_nc: + FIXUP_LDST( vstd_nc $f1, 0($16) ) + FIXUP_LDST( vstd_nc $f1, 32($16) ) + subl $18, 64, $18 + addl $16, 64, $16 + cmplt $18, 64, $1 + beq $1, $simd_loop_nc + memb # required for _nc store instructions + br $31, $simd_loop_end + + .align 3 +$simd_loop: + FIXUP_LDST( vstd $f1, 0($16) ) + FIXUP_LDST( vstd $f1, 32($16) ) + subl $18, 64, $18 + addl $16, 64, $16 + cmplt $18, 64, $1 + beq $1, $simd_loop + +$simd_loop_end: + cmplt $18, 32, $1 + bne $1, $no_more_simd + FIXUP_LDST( vstd $f1, 0($16) ) + subl $18, 32, $18 + addl $16, 32, $16 + +$no_more_simd: + RESTORE_SIMD_REGS + +$simd_end: + ble $18, $out + cmplt $18, 16, $1 + bne $1, $quad_loop_end + + .align 3 +$quad_loop_tail: + FIXUP_LDST( stl $17, 0($16) ) + FIXUP_LDST( stl $17, 8($16) ) + subl $18, 16, $18 + addl $16, 16, $16 + cmplt $18, 16, $1 + beq $1, $quad_loop_tail + +$quad_loop_end: + ble $18, $out + cmplt $18, 8, $1 + bne $1, $byte_loop_tail + +$move_one_quad: + FIXUP_LDST( stl $17, 0($16) ) + subl $18, 8, $18 + addl $16, 8, $16 + ble $18, $out + + .align 3 +$byte_loop_tail: + FIXUP_LDST( stb $17, 0($16) ) + subl $18, 1, $18 + addl $16, 1, $16 + bgt $18, $byte_loop_tail + br $31, $out -- Gitee From 604b7e008400dbb6ca0c8b23442aba4406121966 Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Mon, 10 Oct 2022 14:26:25 +0800 Subject: [PATCH 71/77] sw64: fix incorrect gp after kretprobe triggered Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XTM4 -------------------------------- SW64 use r26 to calculate gp after function return, so r26 needs to be restored when kretprobe trampoline is hit. Signed-off-by: Mao Minkai Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/kprobes/kprobes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sw_64/kernel/kprobes/kprobes.c b/arch/sw_64/kernel/kprobes/kprobes.c index 59f040eaa3e1..7080c892a24d 100644 --- a/arch/sw_64/kernel/kprobes/kprobes.c +++ b/arch/sw_64/kernel/kprobes/kprobes.c @@ -284,6 +284,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, orig_ret_address = __kretprobe_trampoline_handler(regs, kretprobe_trampoline, NULL); instruction_pointer(regs) = orig_ret_address; + regs->r26 = orig_ret_address; /* * By returning a non-zero value, we are telling -- Gitee From 92e21f82d17b7bd890a3d8f98adbbe4f1381ec88 Mon Sep 17 00:00:00 2001 From: He Sheng Date: Thu, 8 Sep 2022 14:39:55 +0800 Subject: [PATCH 72/77] sw64: rename ASN to ASID Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- ASID is a more common name than ASN. It also renames some related macros. Signed-off-by: He Sheng Reviewed-by: Cui Wei Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/hmcall.h | 4 +- arch/sw_64/include/asm/hw_init.h | 2 +- arch/sw_64/include/asm/mmu_context.h | 66 +++++++++++----------------- arch/sw_64/include/asm/tlbflush.h | 10 ++--- arch/sw_64/kernel/setup.c | 2 +- 5 files changed, 35 insertions(+), 49 deletions(-) diff --git a/arch/sw_64/include/asm/hmcall.h b/arch/sw_64/include/asm/hmcall.h index 71d203efc587..5255d91e41a6 100644 --- a/arch/sw_64/include/asm/hmcall.h +++ b/arch/sw_64/include/asm/hmcall.h @@ -17,7 +17,7 @@ #define HMC_wrksp 0x0E #define HMC_mtinten 0x0F #define HMC_load_mm 0x11 -#define HMC_tbisasn 0x14 +#define HMC_tbisasid 0x14 #define HMC_tbivpn 0x19 #define HMC_ret 0x1A #define HMC_wrvpcr 0x29 @@ -167,7 +167,7 @@ __CALL_HMC_R0(whami, unsigned long); __CALL_HMC_RW1(rdio64, unsigned long, unsigned long); __CALL_HMC_RW1(rdio32, unsigned int, unsigned long); __CALL_HMC_W2(wrent, void*, unsigned long); -__CALL_HMC_W2(tbisasn, unsigned long, unsigned long); +__CALL_HMC_W2(tbisasid, unsigned long, unsigned long); __CALL_HMC_W1(wrkgp, unsigned long); __CALL_HMC_RW2(wrperfmon, unsigned long, unsigned long, unsigned long); __CALL_HMC_RW3(sendii, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/arch/sw_64/include/asm/hw_init.h b/arch/sw_64/include/asm/hw_init.h index 81dd2581e0da..1fd7ed18c3f0 100644 --- a/arch/sw_64/include/asm/hw_init.h +++ b/arch/sw_64/include/asm/hw_init.h @@ -18,7 +18,7 @@ struct cache_desc { }; struct cpuinfo_sw64 { - unsigned long last_asn; + unsigned long last_asid; unsigned long ipi_count; struct cache_desc icache; /* Primary I-cache */ struct cache_desc dcache; /* Primary D or combined I/D cache */ diff --git a/arch/sw_64/include/asm/mmu_context.h b/arch/sw_64/include/asm/mmu_context.h index 84e84048a3ba..452da240ce99 100644 --- a/arch/sw_64/include/asm/mmu_context.h +++ b/arch/sw_64/include/asm/mmu_context.h @@ -2,11 +2,6 @@ #ifndef _ASM_SW64_MMU_CONTEXT_H #define _ASM_SW64_MMU_CONTEXT_H -/* - * get a new mmu context.. - * - * Copyright (C) 1996, Linus Torvalds - */ #include #include @@ -17,53 +12,44 @@ * table pointer(CSR:PTBR) or when we update the ASID. * */ -#define load_asn_ptbr load_mm +#define load_asid_ptbr load_mm /* - * The maximum ASN's the processor supports. ASN is called ASID too. + * The maximum ASID's the processor supports. */ #ifdef CONFIG_SUBARCH_C3B -#define WIDTH_HARDWARE_ASN 10 +#define ASID_BITS 10 #endif -/* - * cpu_last_asn(processor): - * 63 0 - * +-------------+----------------+--------------+ - * | asn version | this processor | hardware asn | - * +-------------+----------------+--------------+ - */ - #include -#define cpu_last_asn(cpuid) (cpu_data[cpuid].last_asn) +#define last_asid(cpu) (cpu_data[cpu].last_asid) -#define ASN_FIRST_VERSION (1UL << WIDTH_HARDWARE_ASN) -#define HARDWARE_ASN_MASK ((1UL << WIDTH_HARDWARE_ASN) - 1) +#define ASID_FIRST_VERSION (1UL << ASID_BITS) +#define ASID_MASK ((1UL << ASID_BITS) - 1) /* - * NOTE! The way this is set up, the high bits of the "asn_cache" (and - * the "mm->context") are the ASN _version_ code. A version of 0 is - * always considered invalid, so to invalidate another process you only - * need to do "p->mm->context = 0". + * NOTE! The way this is set up, the high bits of the "last_asid" (and + * the "mm->context.asid[cpu]") are the ASID _version_ code. A version + * of 0 is always considered invalid, so to invalidate another process + * you only need to do "p->mm->context.asid[cpu] = 0". * - * If we need more ASN's than the processor has, we invalidate the old - * user TLB's (tbivp()) and start a new ASN version. That will automatically - * force a new asn for any other processes the next time they want to - * run. + * If we need more ASID's than the processor has, we invalidate the old + * user TLB's (tbivp()) and start a new ASID version. That will force a + * new asid for any other processes the next time they want to run. */ static inline unsigned long __get_new_mm_context(struct mm_struct *mm, long cpu) { - unsigned long asn = cpu_last_asn(cpu); - unsigned long next = asn + 1; + unsigned long asid = last_asid(cpu); + unsigned long next = asid + 1; - if ((asn & HARDWARE_ASN_MASK) >= HARDWARE_ASN_MASK) { + if ((asid & ASID_MASK) >= ASID_MASK) { tbivp(); - next = (asn & ~HARDWARE_ASN_MASK) + ASN_FIRST_VERSION; + next = (asid & ~ASID_MASK) + ASID_FIRST_VERSION; } - cpu_last_asn(cpu) = next; + last_asid(cpu) = next; return next; } @@ -71,26 +57,26 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev_mm, struct mm_struct *next_mm, struct task_struct *next) { - /* Check if our ASN is of an older version, and thus invalid. */ - unsigned long asn, mmc, ptbr; + /* Check if our ASID is of an older version, and thus invalid. */ + unsigned long asid, mmc, ptbr; long cpu = smp_processor_id(); - asn = cpu_last_asn(cpu); + asid = last_asid(cpu); mmc = next_mm->context.asid[cpu]; - if ((mmc ^ asn) & ~HARDWARE_ASN_MASK) { - /* Check if mmc and cpu asn is in the same version */ + if ((mmc ^ asid) & ~ASID_MASK) { + /* Check if mmc and cpu asid is in the same version */ mmc = __get_new_mm_context(next_mm, cpu); next_mm->context.asid[cpu] = mmc; } /* * Update CSR:UPN and CSR:PTBR. Another thread may have allocated - * a new mm->context[asid] (via flush_tlb_mm) without the ASN serial + * a new mm->context[asid] (via flush_tlb_mm) without the ASID serial * number wrapping. We have no way to detect when this is needed. */ - asn = mmc & HARDWARE_ASN_MASK; + asid = mmc & ASID_MASK; ptbr = virt_to_pfn(next_mm->pgd); - load_asn_ptbr(asn, ptbr); + load_asid_ptbr(asid, ptbr); } #define switch_mm_irqs_off switch_mm_irqs_off diff --git a/arch/sw_64/include/asm/tlbflush.h b/arch/sw_64/include/asm/tlbflush.h index b35af83e6ec2..f92a93cfe3db 100644 --- a/arch/sw_64/include/asm/tlbflush.h +++ b/arch/sw_64/include/asm/tlbflush.h @@ -12,7 +12,7 @@ static inline void flush_tlb_current(struct mm_struct *mm) { - unsigned long mmc, asn, ptbr, flags; + unsigned long mmc, asid, ptbr, flags; local_irq_save(flags); @@ -20,12 +20,12 @@ static inline void flush_tlb_current(struct mm_struct *mm) mm->context.asid[smp_processor_id()] = mmc; /* - * Force a new ASN for a task. Note that there is no way to - * write UPN only now, so call load_asn_ptbr here. + * Force a new ASID for a task. Note that there is no way to + * write UPN only now, so call load_asid_ptbr here. */ - asn = mmc & HARDWARE_ASN_MASK; + asid = mmc & ASID_MASK; ptbr = virt_to_pfn(mm->pgd); - load_asn_ptbr(asn, ptbr); + load_asid_ptbr(asid, ptbr); local_irq_restore(flags); } diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index f68d93b5a7b7..d4c97741616f 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -143,7 +143,7 @@ EXPORT_SYMBOL(screen_info); */ void store_cpu_data(int cpu) { - cpu_data[cpu].last_asn = ASN_FIRST_VERSION; + cpu_data[cpu].last_asid = ASID_FIRST_VERSION; } #ifdef CONFIG_KEXEC -- Gitee From 6c0ddc05e1e69570d93f741c0da2dce85eebc762 Mon Sep 17 00:00:00 2001 From: He Sheng Date: Thu, 8 Sep 2022 15:55:46 +0800 Subject: [PATCH 73/77] sw64: rewrite tlb flushing interfaces Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- This patch borrows some loogarch code, ie. it rewrites following interfaces: flush_tlb_all(), flush_tlb_mm(), flush_tlb_page(), flush_tlb_range() and flush_tlb_kernel_range(), then remove flush_tlb() which can be achieved by flush_tlb_mm() according to Documentation/core-api/cachetlb.rst. To support new implementation, it fixes hmcall tbisasid to invalidate TLB of addr with specified ASID and current VPN, and adds hmcall wrasid to force update ASID. Besides, this patch adds helper cpu_asid() and asid_valid(), then simplify __get_new_mm_context() and its callers. That makes code cleaner. Signed-off-by: He Sheng Reviewed-by: Cui Wei Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/hmcall.h | 8 ++ arch/sw_64/include/asm/mmu_context.h | 42 ++++----- arch/sw_64/include/asm/tlbflush.h | 133 ++++++++++----------------- arch/sw_64/kernel/hmcall.c | 42 ++++++++- arch/sw_64/kernel/smp.c | 108 ++++++++++------------ 5 files changed, 165 insertions(+), 168 deletions(-) diff --git a/arch/sw_64/include/asm/hmcall.h b/arch/sw_64/include/asm/hmcall.h index 5255d91e41a6..22de7d9f41a3 100644 --- a/arch/sw_64/include/asm/hmcall.h +++ b/arch/sw_64/include/asm/hmcall.h @@ -12,6 +12,7 @@ #define HMC_cpuid 0x03 #define HMC_sleepen 0x05 #define HMC_rdksp 0x06 +#define HMC_wrasid 0x08 #define HMC_rdptbr 0x0B #define HMC_wrptbr 0x0C #define HMC_wrksp 0x0E @@ -157,8 +158,15 @@ __CALL_HMC_W1(wrusp, unsigned long); __CALL_HMC_R0(rdksp, unsigned long); __CALL_HMC_W1(wrksp, unsigned long); +/* + * Load a mm context. This is needed when we change the page + * table pointer(CSR:PTBR) or when we update the ASID. + * load_mm(asid, ptbr) + * + */ __CALL_HMC_W2(load_mm, unsigned long, unsigned long); +__CALL_HMC_W1(wrasid, unsigned long); __CALL_HMC_R0(rdptbr, unsigned long); __CALL_HMC_W1(wrptbr, unsigned long); diff --git a/arch/sw_64/include/asm/mmu_context.h b/arch/sw_64/include/asm/mmu_context.h index 452da240ce99..5ae9d4616937 100644 --- a/arch/sw_64/include/asm/mmu_context.h +++ b/arch/sw_64/include/asm/mmu_context.h @@ -7,13 +7,6 @@ #include #include -/* - * Load a mm context. This is needed when we change the page - * table pointer(CSR:PTBR) or when we update the ASID. - * - */ -#define load_asid_ptbr load_mm - /* * The maximum ASID's the processor supports. */ @@ -28,6 +21,13 @@ #define ASID_FIRST_VERSION (1UL << ASID_BITS) #define ASID_MASK ((1UL << ASID_BITS) - 1) +#define cpu_asid(cpu, mm) ((mm)->context.asid[cpu] & ASID_MASK) + +static inline bool asid_valid(struct mm_struct *mm, unsigned int cpu) +{ + return !((mm->context.asid[cpu] ^ last_asid(cpu)) & ~ASID_MASK); +} + /* * NOTE! The way this is set up, the high bits of the "last_asid" (and * the "mm->context.asid[cpu]") are the ASID _version_ code. A version @@ -39,18 +39,14 @@ * new asid for any other processes the next time they want to run. */ -static inline unsigned long -__get_new_mm_context(struct mm_struct *mm, long cpu) +static inline void __get_new_mm_context(struct mm_struct *mm, long cpu) { unsigned long asid = last_asid(cpu); - unsigned long next = asid + 1; - if ((asid & ASID_MASK) >= ASID_MASK) { + if (!(++asid & ASID_MASK)) tbivp(); - next = (asid & ~ASID_MASK) + ASID_FIRST_VERSION; - } - last_asid(cpu) = next; - return next; + mm->context.asid[cpu] = last_asid(cpu) = asid; + } static inline void @@ -58,25 +54,21 @@ switch_mm_irqs_off(struct mm_struct *prev_mm, struct mm_struct *next_mm, struct task_struct *next) { /* Check if our ASID is of an older version, and thus invalid. */ - unsigned long asid, mmc, ptbr; + unsigned long asid, ptbr; long cpu = smp_processor_id(); - asid = last_asid(cpu); - mmc = next_mm->context.asid[cpu]; - if ((mmc ^ asid) & ~ASID_MASK) { - /* Check if mmc and cpu asid is in the same version */ - mmc = __get_new_mm_context(next_mm, cpu); - next_mm->context.asid[cpu] = mmc; - } + if (!asid_valid(next_mm, cpu)) + __get_new_mm_context(next_mm, cpu); /* * Update CSR:UPN and CSR:PTBR. Another thread may have allocated * a new mm->context[asid] (via flush_tlb_mm) without the ASID serial * number wrapping. We have no way to detect when this is needed. */ - asid = mmc & ASID_MASK; + asid = cpu_asid(cpu, next_mm); ptbr = virt_to_pfn(next_mm->pgd); - load_asid_ptbr(asid, ptbr); + load_mm(asid, ptbr); + cpumask_set_cpu(cpu, mm_cpumask(next_mm)); } #define switch_mm_irqs_off switch_mm_irqs_off diff --git a/arch/sw_64/include/asm/tlbflush.h b/arch/sw_64/include/asm/tlbflush.h index f92a93cfe3db..53c384932eb9 100644 --- a/arch/sw_64/include/asm/tlbflush.h +++ b/arch/sw_64/include/asm/tlbflush.h @@ -10,121 +10,84 @@ #include #include -static inline void flush_tlb_current(struct mm_struct *mm) +static inline void local_flush_tlb_all(void) { - unsigned long mmc, asid, ptbr, flags; + tbiv(); +} - local_irq_save(flags); +static inline void local_flush_tlb_mm(struct mm_struct *mm) +{ + int cpu; + unsigned long flags; - mmc = __get_new_mm_context(mm, smp_processor_id()); - mm->context.asid[smp_processor_id()] = mmc; + local_irq_save(flags); - /* - * Force a new ASID for a task. Note that there is no way to - * write UPN only now, so call load_asid_ptbr here. - */ - asid = mmc & ASID_MASK; - ptbr = virt_to_pfn(mm->pgd); - load_asid_ptbr(asid, ptbr); + cpu = smp_processor_id(); + if (!asid_valid(mm, cpu)) { + cpumask_clear_cpu(cpu, mm_cpumask(mm)); + goto out; + } + if (current->mm == mm) { + __get_new_mm_context(mm, cpu); + wrasid(cpu_asid(cpu, mm)); + } else { + mm->context.asid[cpu] = 0; + cpumask_clear_cpu(cpu, mm_cpumask(mm)); + } +out: local_irq_restore(flags); } -/* - * Flush just one page in the current TLB set. We need to be very - * careful about the icache here, there is no way to invalidate a - * specific icache page. - */ - -static inline void flush_tlb_current_page(struct mm_struct *mm, - struct vm_area_struct *vma, - unsigned long addr) -{ - if (vma->vm_flags & VM_EXEC) - tbis(addr); - else - tbisd(addr); -} - - -/* Flush current user mapping. */ -static inline void flush_tlb(void) +static inline void +local_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - flush_tlb_current(current->active_mm); -} + int cpu; + struct mm_struct *mm; -/* Flush someone else's user mapping. */ -static inline void flush_tlb_other(struct mm_struct *mm) -{ - unsigned long *mmc; + cpu = smp_processor_id(); + mm = vma->vm_mm; - if (mm) { - mmc = &mm->context.asid[smp_processor_id()]; - /* - * Check it's not zero first to avoid cacheline ping pong - * when possible. - */ - if (*mmc) - *mmc = 0; - } + if (asid_valid(mm, cpu)) + tbisasid(cpu_asid(cpu, mm), addr); + else + cpumask_clear_cpu(cpu, mm_cpumask(mm)); } -#ifndef CONFIG_SMP /* - * Flush everything (kernel mapping may also have changed - * due to vmalloc/vfree). + * It flushes the whole user tlb now. */ -static inline void flush_tlb_all(void) -{ - tbiv(); -} - -/* Flush a specified user mapping. */ static inline void -flush_tlb_mm(struct mm_struct *mm) +local_flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) { - if (mm == current->mm) - flush_tlb_current(mm); - else - flush_tlb_other(mm); -} - -/* Page-granular tlb flush. */ -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - struct mm_struct *mm = vma->vm_mm; - - if (mm == current->mm) - flush_tlb_current_page(mm, vma, addr); - else - flush_tlb_other(mm); + local_flush_tlb_mm(vma->vm_mm); } /* - * Flush a specified range of user mapping. On the sw64 we flush - * the whole user tlb. + * There is no way to invalidate kernel pages only, so it has to + * inlvalidate all mapping. */ -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static inline void +local_flush_tlb_kernel_range(unsigned long start, unsigned long end) { - flush_tlb_mm(vma->vm_mm); + local_flush_tlb_all(); } -#else /* CONFIG_SMP */ - +#ifdef CONFIG_SMP extern void flush_tlb_all(void); extern void flush_tlb_mm(struct mm_struct *); extern void flush_tlb_page(struct vm_area_struct *, unsigned long); extern void flush_tlb_range(struct vm_area_struct *, unsigned long, unsigned long); +extern void flush_tlb_kernel_range(unsigned long, unsigned long); +#else +#define flush_tlb_all() local_flush_tlb_all() +#define flush_tlb_mm(mm) local_flush_tlb_mm(mm) +#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) +#define flush_tlb_range(vma, start, end) local_flush_tlb_range(vma, start, end) +#define flush_tlb_kernel_range(start, end) local_flush_tlb_kernel_range(start, end) #endif /* CONFIG_SMP */ -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - flush_tlb_all(); -} - #endif /* _ASM_SW64_TLBFLUSH_H */ diff --git a/arch/sw_64/kernel/hmcall.c b/arch/sw_64/kernel/hmcall.c index b81d7fff1c34..3d60569a4f6f 100644 --- a/arch/sw_64/kernel/hmcall.c +++ b/arch/sw_64/kernel/hmcall.c @@ -38,11 +38,51 @@ static inline void fixup_wrtp(void) entry[1] = 0x1ee00000; /* pri_ret $23 */ } +static inline void fixup_tbiasid(void) +{ + unsigned int *entry = __va(HMCALL_ENTRY(tbisasid)); + + entry[0] = 0x18fffe47; /* pri_rcsr p7, CSR__DTB_PCR*/ + entry[1] = 0x4a05c905; /* sll r16, CSR__DTB_PCR__UPN__S, p5 */ + entry[2] = 0xf89f03ff; /* ldi p4, CSR__DTB_PCR__UPN__M */ + entry[3] = 0x4885c904; /* sll p4, CSR__DTB_PCR__UPN__S, p4 */ + entry[4] = 0x40e40724; /* bic p7, p4, p4 */ + entry[5] = 0x40850745; /* bis p4, p5, p5 */ + entry[6] = 0x18bfff47; /* pri_wcsr p5, CSR__DTB_PCR */ + entry[7] = 0x1a3fff46; /* pri_wcsr r17, CSR__DTB_IS */ + entry[8] = 0x18ffff47; /* pri_wcsr p7, CSR__DTB_PCR */ + entry[9] = 0x4a04e906; /* sll r16, CSR__UPCR_UPN__UPN__S, p6 */ + entry[10] = 0x189ffe22; /* pri_rcsr p4, CSR__UPCR_UPN */ + entry[11] = 0x18dfff22; /* pri_wcsr p6, CSR__UPCR_UPN */ + entry[12] = 0x1a3fff06; /* pri_wcsr r17, CSR__ITB_IS */ + entry[13] = 0x1bffff15; /* pri_wcsr r31, CSR__IC_FLUSH */ + entry[14] = 0x189fff22; /* pri_wcsr p4, CSR__UPCR_UPN */ + entry[15] = 0x1ef00000; /* pri_ret/b p23 */ +} + +static inline void fixup_wrasid(void) +{ + unsigned int *entry = __va(HMCALL_ENTRY(wrasid)); + + entry[0] = 0x18fffe47; /* pri_rcsr p7, CSR__DTB_PCR*/ + entry[1] = 0x4a05c905; /* sll r16, CSR__DTB_PCR__UPN__S, p5 */ + entry[2] = 0xf89f03ff; /* ldi p4, CSR__DTB_PCR__UPN__M */ + entry[3] = 0x4885c904; /* sll p4, CSR__DTB_PCR__UPN__S, p4 */ + entry[4] = 0x40e40724; /* bic p7, p4, p4 */ + entry[5] = 0x40850745; /* bis p4, p5, p5 */ + entry[6] = 0x18bfff47; /* pri_wcsr p5, CSR__DTB_PCR */ + entry[7] = 0x4a04e906; /* sll r16, CSR__UPCR_UPN__UPN__S, p6 */ + entry[8] = 0x18dfff22; /* pri_wcsr p4, CSR__UPCR_UPN */ + entry[9] = 0x1ef00000; /* pri_ret/b p23 */ +} + void __init fixup_hmcall(void) { -#if defined(CONFIG_SUBARCH_C3A) || defined(CONFIG_SUBARCH_C3B) +#if defined(CONFIG_SUBARCH_C3B) fixup_rdtp(); fixup_wrtp(); + fixup_tbiasid(); + fixup_wrasid(); #endif } diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index b467562bce9e..ecf276e9e364 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -478,7 +478,7 @@ void native_send_call_func_single_ipi(int cpu) static void ipi_flush_tlb_all(void *ignored) { - tbiv(); + local_flush_tlb_all(); } void flush_tlb_all(void) @@ -491,108 +491,102 @@ void flush_tlb_all(void) static void ipi_flush_tlb_mm(void *x) { - struct mm_struct *mm = (struct mm_struct *) x; - - if (mm == current->mm) - flush_tlb_current(mm); - else - flush_tlb_other(mm); + local_flush_tlb_mm((struct mm_struct *)x); } void flush_tlb_mm(struct mm_struct *mm) { - preempt_disable(); /* happens as a result of exit_mmap() * Shall we clear mm->context.asid[] here? */ if (atomic_read(&mm->mm_users) == 0) { - preempt_enable(); return; } - if (mm == current->mm) { - flush_tlb_current(mm); - if (atomic_read(&mm->mm_users) == 1) { - int cpu, this_cpu = smp_processor_id(); + preempt_disable(); - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (!cpu_online(cpu) || cpu == this_cpu) - continue; - if (mm->context.asid[cpu]) - mm->context.asid[cpu] = 0; - } - preempt_enable(); - return; - } - } else - flush_tlb_other(mm); + if (atomic_read(&mm->mm_users) != 1 || mm != current->mm) { + on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1); + } else { + int cpu, this_cpu = smp_processor_id(); - smp_call_function(ipi_flush_tlb_mm, mm, 1); + for_each_online_cpu(cpu) { + if (cpu != this_cpu && mm->context.asid[cpu]) + mm->context.asid[cpu] = 0; + } + local_flush_tlb_mm(mm); + } preempt_enable(); } EXPORT_SYMBOL(flush_tlb_mm); -struct flush_tlb_page_struct { +struct flush_tlb_info { struct vm_area_struct *vma; - struct mm_struct *mm; unsigned long addr; +#define start addr + unsigned long end; }; static void ipi_flush_tlb_page(void *x) { - struct flush_tlb_page_struct *data = (struct flush_tlb_page_struct *)x; - struct mm_struct *mm = data->mm; - - if (mm == current->mm) - flush_tlb_current_page(mm, data->vma, data->addr); - else - flush_tlb_other(mm); + struct flush_tlb_info *info = x; + local_flush_tlb_page(info->vma, info->addr); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - struct flush_tlb_page_struct data; struct mm_struct *mm = vma->vm_mm; preempt_disable(); - if (mm == current->mm) { - flush_tlb_current_page(mm, vma, addr); - if (atomic_read(&mm->mm_users) == 1) { - int cpu, this_cpu = smp_processor_id(); - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (!cpu_online(cpu) || cpu == this_cpu) - continue; - if (mm->context.asid[cpu]) - mm->context.asid[cpu] = 0; - } - preempt_enable(); - return; + if (atomic_read(&mm->mm_users) != 1 || mm != current->mm) { + struct flush_tlb_info info = { + .vma = vma, + .addr = addr, + }; + on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_page, &info, 1); + } else { + int cpu, this_cpu = smp_processor_id(); + + for_each_online_cpu(cpu) { + if (cpu != this_cpu && mm->context.asid[cpu]) + mm->context.asid[cpu] = 0; } - } else - flush_tlb_other(mm); - - data.vma = vma; - data.mm = mm; - data.addr = addr; - - smp_call_function(ipi_flush_tlb_page, &data, 1); + local_flush_tlb_page(vma, addr); + } preempt_enable(); } EXPORT_SYMBOL(flush_tlb_page); +/* It always flush the whole user tlb by now. To be optimized. */ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - /* On the SW we always flush the whole user tlb. */ flush_tlb_mm(vma->vm_mm); } EXPORT_SYMBOL(flush_tlb_range); +static void ipi_flush_tlb_kernel_range(void *x) +{ + struct flush_tlb_info *info = x; + + local_flush_tlb_kernel_range(info->start, info->end); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + struct flush_tlb_info info = { + .start = start, + .end = end, + }; + + on_each_cpu(ipi_flush_tlb_kernel_range, &info, 1); +} +EXPORT_SYMBOL(flush_tlb_kernel_range); + int native_cpu_disable(void) { int cpu = smp_processor_id(); -- Gitee From 1a1339853c20d1c6b28cd5582c0a95866f86da5e Mon Sep 17 00:00:00 2001 From: Mao Minkai Date: Mon, 10 Oct 2022 15:57:11 +0800 Subject: [PATCH 74/77] sw64: fix incorrect gp after uretprobe triggered Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- SW64 use r26 to calculate gp after function return, so r26 needs to be restored when uretprobe trampoline is hit. Since uretprobe is handled in generic code, we will modify r26 before we return to user space. Signed-off-by: Mao Minkai Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/uprobes.h | 2 ++ arch/sw_64/kernel/traps.c | 6 +++-- arch/sw_64/kernel/uprobes.c | 38 ++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/arch/sw_64/include/asm/uprobes.h b/arch/sw_64/include/asm/uprobes.h index 97b67af25bce..2a5b268cb88f 100644 --- a/arch/sw_64/include/asm/uprobes.h +++ b/arch/sw_64/include/asm/uprobes.h @@ -35,4 +35,6 @@ struct arch_uprobe_task { unsigned long saved_trap_nr; }; +extern void sw64_fix_uretprobe(struct pt_regs *regs); + #endif /* _ASM_SW64_UPROBES_H */ diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index 5fac85c29bf6..9362fcd922c5 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -267,12 +267,14 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) case BREAK_KPROBE_SS: if (notify_die(DIE_SSTEPBP, "single_step", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) return; +#ifdef CONFIG_UPROBES case UPROBE_BRK_UPROBE: if (notify_die(DIE_UPROBE, "uprobe", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) - return; + return sw64_fix_uretprobe(regs); case UPROBE_BRK_UPROBE_XOL: if (notify_die(DIE_UPROBE_XOL, "uprobe_xol", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) - return; + return sw64_fix_uretprobe(regs); +#endif } if (user_mode(regs)) diff --git a/arch/sw_64/kernel/uprobes.c b/arch/sw_64/kernel/uprobes.c index 786f2e38a59f..1160ca1e836a 100644 --- a/arch/sw_64/kernel/uprobes.c +++ b/arch/sw_64/kernel/uprobes.c @@ -151,3 +151,41 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { return 0; } + +/* + * struct xol_area and get_trampoline_vaddr() are copied from + * kernel/events/uprobes.c to avoid modifying arch-independent + * code. + */ +struct xol_area { + wait_queue_head_t wq; + atomic_t slot_count; + unsigned long *bitmap; + struct vm_special_mapping xol_mapping; + struct page *pages[2]; + unsigned long vaddr; +}; + +static unsigned long get_trampoline_vaddr(void) +{ + struct xol_area *area; + unsigned long trampoline_vaddr = -1; + + area = READ_ONCE(current->mm->uprobes_state.xol_area); + if (area) + trampoline_vaddr = area->vaddr; + + return trampoline_vaddr; +} + +void sw64_fix_uretprobe(struct pt_regs *regs) +{ + unsigned long bp_vaddr; + + bp_vaddr = uprobe_get_swbp_addr(regs); + /* + * regs->pc has been changed to orig_ret_vaddr in handle_trampoline(). + */ + if (bp_vaddr == get_trampoline_vaddr()) + regs->r26 = regs->pc; +} -- Gitee From e6fb952e453e802c7b0a4f188dac8e605c0af5df Mon Sep 17 00:00:00 2001 From: He Sheng Date: Thu, 13 Oct 2022 10:59:27 +0800 Subject: [PATCH 75/77] sw64: handle kprobe breakpoint if CONFIG_KPROBES=y Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG -------------------------------- If CONFIG_KPROBES is not set, kprobe breakpoint instructions are treated as illegal instructions. Signed-off-by: He Sheng Reviewed-by: Cui Wei Signed-off-by: Gu Zitao --- arch/sw_64/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index 9362fcd922c5..b26a0e369ed9 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -261,12 +261,14 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) case IF_OPDEC: switch (inst) { +#ifdef CONFIG_KPROBES case BREAK_KPROBE: if (notify_die(DIE_BREAK, "kprobe", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) return; case BREAK_KPROBE_SS: if (notify_die(DIE_SSTEPBP, "single_step", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) return; +#endif #ifdef CONFIG_UPROBES case UPROBE_BRK_UPROBE: if (notify_die(DIE_UPROBE, "uprobe", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) -- Gitee From e9df0c725625dc097a5766b259bf354ee01f7bd1 Mon Sep 17 00:00:00 2001 From: He Sheng Date: Thu, 13 Oct 2022 11:01:10 +0800 Subject: [PATCH 76/77] sw64: fix compile error for CONFIG_RRU=y Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56QAM -------------------------------- Signed-off-by: He Sheng Reviewed-by: Cui Wei Signed-off-by: Gu Zitao --- arch/sw_64/kernel/dup_print.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/sw_64/kernel/dup_print.c b/arch/sw_64/kernel/dup_print.c index 02639f40a4bc..3a32c444207d 100644 --- a/arch/sw_64/kernel/dup_print.c +++ b/arch/sw_64/kernel/dup_print.c @@ -52,6 +52,8 @@ int sw64_printk(const char *fmt, va_list args) #endif #ifdef CONFIG_SW64_RRU +#include + static DEFINE_SPINLOCK(printf_lock); #define USER_PRINT_BUFF_BASE (0x600000UL + __START_KERNEL_map) #define USER_PRINT_BUFF_LEN 0x100000UL -- Gitee From 8c501acc55e0a0f17270fae6ba74af2d0e2a99df Mon Sep 17 00:00:00 2001 From: He Chuyue Date: Mon, 17 Oct 2022 09:16:19 +0800 Subject: [PATCH 77/77] sw64: remove single step setting in uprobe Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XTMN -------------------------------- There is no point in calling user_{enable,disable}_single_step() since uprobe breakpoints are implemented by illegal instructions on sw64. The arch-specific implementation of these two functions have been removed before, and it's time to remove the calling now. Signed-off-by: He Chuyue Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/uprobes.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/sw_64/kernel/uprobes.c b/arch/sw_64/kernel/uprobes.c index 1160ca1e836a..e25793f4a058 100644 --- a/arch/sw_64/kernel/uprobes.c +++ b/arch/sw_64/kernel/uprobes.c @@ -4,8 +4,6 @@ #include #include -#define UPROBE_TRAP_NR ULONG_MAX - /** * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. * @mm: the probed address space. @@ -54,8 +52,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs) /* Instruction points to execute ol */ instruction_pointer_set(regs, utask->xol_vaddr); - user_enable_single_step(current); - return 0; } @@ -66,8 +62,6 @@ int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs) /* Instruction points to execute next to breakpoint address */ instruction_pointer_set(regs, utask->vaddr + 4); - user_disable_single_step(current); - return 0; } -- Gitee