diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index cf2f6f00708c64669537b78371b9af7450a2a67b..36cddefb1ad4bcdb734b0bbfe14adee8f0f6f92d 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -14,7 +14,6 @@ config SW64 select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION select ARCH_HAVE_NMI_SAFE_CMPXCHG - select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_NO_PREEMPT select ARCH_USE_CMPXCHG_LOCKREF select GENERIC_SMP_IDLE_THREAD @@ -24,7 +23,6 @@ config SW64 select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_SECCOMP_FILTER - select OLD_SIGACTION select OLD_SIGSUSPEND select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER @@ -92,11 +90,14 @@ config SW64 select ACPI_REDUCED_HARDWARE_ONLY select GENERIC_TIME_VSYSCALL select SET_FS + select HAVE_PCI + select GENERIC_PCI_IOMAP if PCI select PCI_MSI_ARCH_FALLBACKS select DMA_OPS if PCI select HAVE_REGS_AND_STACK_ACCESS_API select ARCH_HAS_PTE_SPECIAL select HARDIRQS_SW_RESEND + select MEMORY_HOTPLUG_SPARSE if MEMORY_HOTPLUG config LOCKDEP_SUPPORT def_bool y @@ -141,6 +142,10 @@ config ARCH_HAS_ILOG2_U64 config GENERIC_GPIO bool +config GENERIC_CALIBRATE_DELAY + bool + default y + config ZONE_DMA32 bool default y @@ -240,6 +245,11 @@ config PLATFORM_XUELANG endchoice +config MIGHT_HAVE_PC_SERIO + bool "Use PC serio device i8042" + select ARCH_MIGHT_HAVE_PC_SERIO + default n + endmenu config LOCK_MEMB @@ -509,17 +519,6 @@ config ISA_DMA_API bool default y -config PCI - bool "PCI Support" - depends on SW64 - select GENERIC_PCI_IOMAP - default y - help - Find out whether you have a PCI motherboard. PCI is the name of a - bus system, i.e. the way the CPU talks to the other stuff inside - your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or - VESA. If you have PCI, say Y, otherwise N. - config PCI_DOMAINS def_bool PCI @@ -724,7 +723,6 @@ config HZ int "HZ of the short timer" default 500 -source "drivers/pci/Kconfig" source "drivers/eisa/Kconfig" source "drivers/pcmcia/Kconfig" diff --git a/arch/sw_64/chip/chip3/chip.c b/arch/sw_64/chip/chip3/chip.c index 84ca7ffcb2ef528d114c39b50fde947561dedc07..105389d5989fe7d2f81ad5da3e229cf122f1885e 100644 --- a/arch/sw_64/chip/chip3/chip.c +++ b/arch/sw_64/chip/chip3/chip.c @@ -393,7 +393,6 @@ static void chip3_set_rc_piu(unsigned long node, unsigned long index) /* set DMA offset value PCITODMA_OFFSET */ write_piu_ior0(node, index, EPDMABAR, PCITODMA_OFFSET); if (IS_ENABLED(CONFIG_PCI_MSI)) { - write_piu_ior0(node, index, PIUCONFIG0, 0x38076); write_piu_ior0(node, index, MSIADDR, MSIX_MSG_ADDR); for (i = 0; i < 256; i++) write_piu_ior0(node, index, MSICONFIG0 + (i << 7), 0); @@ -656,8 +655,8 @@ static void handle_dev_int(struct pt_regs *regs) sw64_io_write(node, DEV_INT_CONFIG, config_val); } -void handle_chip_irq(unsigned long type, unsigned long vector, - unsigned long irq_arg, struct pt_regs *regs) +asmlinkage void do_entInt(unsigned long type, unsigned long vector, + unsigned long irq_arg, struct pt_regs *regs) { struct pt_regs *old_regs; @@ -738,6 +737,7 @@ void handle_chip_irq(unsigned long type, unsigned long vector, } pr_crit("PC = %016lx PS = %04lx\n", regs->pc, regs->ps); } +EXPORT_SYMBOL(do_entInt); /* * Early fix up the chip3 Root Complex settings diff --git a/arch/sw_64/include/asm/cacheflush.h b/arch/sw_64/include/asm/cacheflush.h index 985161896f71bb1edeb050b932e6551aff62c879..536b0b7b78bdbb7269c2e772818043fee937bbf4 100644 --- a/arch/sw_64/include/asm/cacheflush.h +++ b/arch/sw_64/include/asm/cacheflush.h @@ -2,94 +2,12 @@ #ifndef _ASM_SW64_CACHEFLUSH_H #define _ASM_SW64_CACHEFLUSH_H -#include -#include - -/* Caches aren't brain-dead on the sw64. */ -#define flush_cache_all() do { } while (0) -#define flush_cache_mm(mm) do { } while (0) -#define flush_cache_dup_mm(mm) do { } while (0) -#define flush_cache_range(vma, start, end) do { } while (0) -#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 -#define flush_dcache_page(page) do { } while (0) -#define flush_dcache_mmap_lock(mapping) do { } while (0) -#define flush_dcache_mmap_unlock(mapping) do { } while (0) -#define flush_cache_vmap(start, end) do { } while (0) -#define flush_cache_vunmap(start, end) do { } while (0) - -/* Note that the following two definitions are _highly_ dependent - * on the contexts in which they are used in the kernel. I personally - * think it is criminal how loosely defined these macros are. +/* + * DCache: PIPT + * ICache: + * - C3A/B is VIVT with ICTAG, support coherence. + * - C4 is VIPT */ - -/* We need to flush the kernel's icache after loading modules. The - * only other use of this macro is in load_aout_interp which is not - * used on sw64. - - * Note that this definition should *not* be used for userspace - * icache flushing. While functional, it is _way_ overkill. The - * icache is tagged with ASNs and it suffices to allocate a new ASN - * for the process. - */ -#ifndef CONFIG_SMP -static inline void -flush_icache_range(unsigned long start, unsigned long end) -{ - if (icache_is_vivt_no_ictag()) - imb(); -} -#define flush_icache_range flush_icache_range -#else -extern void smp_imb(void); -static inline void -flush_icache_range(unsigned long start, unsigned long end) -{ - if (icache_is_vivt_no_ictag()) - smp_imb(); -} -#define flush_icache_range flush_icache_range -#endif - -/* We need to flush the userspace icache after setting breakpoints in - * ptrace. - - * Instead of indiscriminately using imb, take advantage of the fact - * that icache entries are tagged with the ASN and load a new mm context. - */ -/* ??? Ought to use this in arch/sw_64/kernel/signal.c too. */ - -#ifndef CONFIG_SMP -#include - -extern void __load_new_mm_context(struct mm_struct *); -static inline void -flush_icache_user_page(struct vm_area_struct *vma, struct page *page, - unsigned long addr, int len) -{ - if ((vma->vm_flags & VM_EXEC) && icache_is_vivt_no_ictag()) - imb(); -} -#define flush_icache_user_page flush_icache_user_page -#else -extern void flush_icache_user_page(struct vm_area_struct *vma, - struct page *page, - unsigned long addr, int len); -#define flush_icache_user_page flush_icache_user_page -#endif - -/* This is used only in __do_fault and do_swap_page. */ -#define flush_icache_page(vma, page) \ - flush_icache_user_page((vma), (page), 0, 0) - -#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ -do { \ - memcpy(dst, src, len); \ - flush_icache_user_page(vma, page, vaddr, len); \ -} while (0) -#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ - memcpy(dst, src, len) - #include #endif /* _ASM_SW64_CACHEFLUSH_H */ diff --git a/arch/sw_64/include/asm/clock.h b/arch/sw_64/include/asm/clock.h index 06ad4bcd6ad3f2599a15a8acfc18dd08eb3685e8..88714eb08507c6bdfd39326c6939e5610eb3421d 100644 --- a/arch/sw_64/include/asm/clock.h +++ b/arch/sw_64/include/asm/clock.h @@ -44,13 +44,13 @@ struct clk { int clk_init(void); -int sw64_set_rate(int index, unsigned long rate); +void sw64_set_rate(unsigned long rate); struct clk *sw64_clk_get(struct device *dev, const char *id); -unsigned long sw64_clk_get_rate(struct clk *clk); - void sw64_update_clockevents(unsigned long cpu, u32 freq); void sw64_store_policy(struct cpufreq_policy *policy); + +unsigned int __sw64_cpufreq_get(struct cpufreq_policy *policy); #endif /* _ASM_SW64_CLOCK_H */ diff --git a/arch/sw_64/include/asm/hmcall.h b/arch/sw_64/include/asm/hmcall.h index 310cc61a5a343b4e531fe98ae01b9eb3e5aba7f4..e85397ab06a1de2cee870f51b3506af577211f00 100644 --- a/arch/sw_64/include/asm/hmcall.h +++ b/arch/sw_64/include/asm/hmcall.h @@ -17,16 +17,12 @@ #define HMC_wrksp 0x0E #define HMC_mtinten 0x0F #define HMC_load_mm 0x11 -#define HMC_rdpcbb 0x12 -#define HMC_wrpcbb 0x13 #define HMC_tbisasn 0x14 #define HMC_tbivpn 0x19 #define HMC_ret 0x1A #define HMC_wrvpcr 0x29 #define HMC_wrfen 0x2B -#define HMC_kvcpucb 0x2C #define HMC_sflush 0x2F -#define HMC_swpctx 0x30 #define HMC_entervm 0x31 #define HMC_hcall 0x32 #define HMC_tbi 0x33 @@ -45,23 +41,27 @@ /* 0x80 - 0xBF : User Level HMC routine */ -#define HMC_bpt 0x80 -#define HMC_callsys 0x83 -#define HMC_imb 0x86 +#include + +/* Following will be deprecated from user level invocation */ #define HMC_rwreg 0x87 -#define HMC_rdunique 0x9E -#define HMC_wrunique 0x9F #define HMC_sz_uflush 0xA8 -#define HMC_gentrap 0xAA -#define HMC_wrperfmon 0xB0 #define HMC_longtime 0xB1 #ifdef __KERNEL__ #ifndef __ASSEMBLY__ +#include +extern void __init fixup_hmcall(void); + extern void halt(void) __attribute__((noreturn)); #define __halt() __asm__ __volatile__ ("sys_call %0 #halt" : : "i" (HMC_halt)) +#define fpu_enable() \ +{ \ + __asm__ __volatile__("sys_call %0" : : "i" (HMC_wrfen));\ +} + #define imb() \ __asm__ __volatile__ ("sys_call %0 #imb" : : "i" (HMC_imb) : "memory") @@ -156,8 +156,6 @@ __CALL_HMC_R0(rdksp, unsigned long); __CALL_HMC_W1(wrksp, unsigned long); __CALL_HMC_W2(load_mm, unsigned long, unsigned long); -__CALL_HMC_R0(rdpcbb, unsigned long); -__CALL_HMC_W1(wrpcbb, unsigned long); __CALL_HMC_R0(rdptbr, unsigned long); __CALL_HMC_W1(wrptbr, unsigned long); @@ -166,7 +164,6 @@ __CALL_HMC_RW1(swpipl, unsigned long, unsigned long); __CALL_HMC_R0(whami, unsigned long); __CALL_HMC_RW1(rdio64, unsigned long, unsigned long); __CALL_HMC_RW1(rdio32, unsigned int, unsigned long); -__CALL_HMC_R0(kvcpucb, unsigned long); __CALL_HMC_R0(sleepen, unsigned long); __CALL_HMC_R0(mtinten, unsigned long); __CALL_HMC_W2(wrent, void*, unsigned long); @@ -178,6 +175,7 @@ __CALL_HMC_W1(wrtimer, unsigned long); __CALL_HMC_RW3(tbivpn, unsigned long, unsigned long, unsigned long, unsigned long); __CALL_HMC_RW2(cpuid, unsigned long, unsigned long, unsigned long); +__CALL_HMC_W1(wrtp, unsigned long); /* * TB routines.. */ @@ -193,12 +191,28 @@ __CALL_HMC_RW2(cpuid, unsigned long, unsigned long, unsigned long); }) #define tbi(x, y) __tbi(x, __r17 = (y), "1" (__r17)) -#define tbisi(x) __tbi(1, __r17 = (x), "1" (__r17)) -#define tbisd(x) __tbi(2, __r17 = (x), "1" (__r17)) -#define tbis(x) __tbi(3, __r17 = (x), "1" (__r17)) -#define tbiap() __tbi(-1, /* no second argument */) + +/* Invalidate all TLB, only used by hypervisor */ #define tbia() __tbi(-2, /* no second argument */) +/* Invalidate TLB for all processes with currnet VPN */ +#define tbivp() __tbi(-1, /* no second argument */) + +/* Invalidate all TLB with current VPN */ +#define tbiv() __tbi(0, /* no second argument */) + +/* Invalidate ITLB of addr with current UPN and VPN */ +#define tbisi(addr) __tbi(1, __r17 = (addr), "1" (__r17)) + +/* Invalidate DTLB of addr with current UPN and VPN */ +#define tbisd(addr) __tbi(2, __r17 = (addr), "1" (__r17)) + +/* Invalidate TLB of addr with current UPN and VPN */ +#define tbis(addr) __tbi(3, __r17 = (addr), "1" (__r17)) + +/* Invalidate all user TLB with current UPN and VPN */ +#define tbiu() __tbi(4, /* no second argument */) + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/sw_64/include/asm/hw_init.h b/arch/sw_64/include/asm/hw_init.h index f60a58570a9219c5d796d0383843b6c62d1eb93b..8a28aac2e54f05d83de77f69e030b7976d871a86 100644 --- a/arch/sw_64/include/asm/hw_init.h +++ b/arch/sw_64/include/asm/hw_init.h @@ -18,16 +18,8 @@ struct cache_desc { }; struct cpuinfo_sw64 { - unsigned long loops_per_jiffy; unsigned long last_asn; - int need_new_asn; - int asn_lock; unsigned long ipi_count; - unsigned long prof_multiplier; - unsigned long prof_counter; - unsigned char mcheck_expected; - unsigned char mcheck_taken; - unsigned char mcheck_extra; struct cache_desc icache; /* Primary I-cache */ struct cache_desc dcache; /* Primary D or combined I/D cache */ struct cache_desc scache; /* Secondary cache */ @@ -45,7 +37,6 @@ struct cpu_desc_t { char vendor_id[16]; char model_id[64]; unsigned long frequency; - __u8 run_mode; } __randomize_layout; #define MAX_NUMSOCKETS 8 @@ -74,6 +65,8 @@ struct memmap_entry { }; extern struct cpuinfo_sw64 cpu_data[NR_CPUS]; +extern void store_cpu_data(int cpu); + extern struct cpu_desc_t cpu_desc; extern struct socket_desc_t socket_desc[MAX_NUMSOCKETS]; extern int memmap_nr; @@ -89,12 +82,11 @@ static inline unsigned long get_cpu_freq(void) return cpu_desc.frequency; } -static inline bool icache_is_vivt_no_ictag(void) +static inline void update_cpu_freq(unsigned long freq) { - /* - * Icache of C3B is vivt with ICtag. C4 will be vipt. - */ - return (cpu_desc.arch_var == 0x3 && cpu_desc.arch_rev == 0x1); + freq = freq * 1000000; + if (cpu_desc.frequency != freq) + cpu_desc.frequency = freq; } #define EMUL_FLAG (0x1UL << 63) diff --git a/arch/sw_64/include/asm/irq_impl.h b/arch/sw_64/include/asm/irq_impl.h index b568efef699487405884b82789c61f69b216371c..48dbc486a126d6b37cf10897ef2d9518e6302a75 100644 --- a/arch/sw_64/include/asm/irq_impl.h +++ b/arch/sw_64/include/asm/irq_impl.h @@ -41,10 +41,8 @@ enum sw64_irq_type { extern struct irqaction timer_irqaction; extern void init_rtc_irq(irq_handler_t handler); extern void handle_irq(int irq); -extern void handle_ipi(struct pt_regs *); +extern void handle_ipi(struct pt_regs *regs); extern void __init sw64_init_irq(void); extern irqreturn_t timer_interrupt(int irq, void *dev); -extern void handle_chip_irq(unsigned long type, unsigned long vector, - unsigned long irq_arg, struct pt_regs *regs); #endif diff --git a/arch/sw_64/include/asm/mmu_context.h b/arch/sw_64/include/asm/mmu_context.h index d6cd01d5571211908bf5424f7f01665c4542d1a0..84e84048a3ba6527941b1b1271c1fdcafdddda3a 100644 --- a/arch/sw_64/include/asm/mmu_context.h +++ b/arch/sw_64/include/asm/mmu_context.h @@ -13,38 +13,14 @@ #include /* - * Force a context reload. This is needed when we change the page - * table pointer or when we update the ASN of the current process. + * Load a mm context. This is needed when we change the page + * table pointer(CSR:PTBR) or when we update the ASID. + * */ - -static inline unsigned long -__reload_thread(struct pcb_struct *pcb) -{ - register unsigned long a0 __asm__("$16"); - register unsigned long v0 __asm__("$0"); - - a0 = virt_to_phys(pcb); - __asm__ __volatile__( - "sys_call %2 #__reload_thread" - : "=r"(v0), "=r"(a0) - : "i"(HMC_swpctx), "r"(a0) - : "$1", "$22", "$23", "$24", "$25"); - - return v0; -} - #define load_asn_ptbr load_mm /* - * The maximum ASN's the processor supports. - * - * If a processor implements address space numbers (ASNs), and the old - * PTE has the Address Space Match (ASM) bit clear (ASNs in use) and - * the Valid bit set, then entries can also effectively be made coherent - * by assigning a new, unused ASN to the currently running process and - * not reusing the previous ASN before calling the appropriate HMcode - * routine to invalidate the translation buffer (TB). - * + * The maximum ASN's the processor supports. ASN is called ASID too. */ #ifdef CONFIG_SUBARCH_C3B @@ -60,12 +36,7 @@ __reload_thread(struct pcb_struct *pcb) */ #include -#ifdef CONFIG_SMP #define cpu_last_asn(cpuid) (cpu_data[cpuid].last_asn) -#else -extern unsigned long last_asn; -#define cpu_last_asn(cpuid) last_asn -#endif /* CONFIG_SMP */ #define ASN_FIRST_VERSION (1UL << WIDTH_HARDWARE_ASN) #define HARDWARE_ASN_MASK ((1UL << WIDTH_HARDWARE_ASN) - 1) @@ -77,7 +48,7 @@ extern unsigned long last_asn; * need to do "p->mm->context = 0". * * If we need more ASN's than the processor has, we invalidate the old - * user TLB's (tbiap()) and start a new ASN version. That will automatically + * user TLB's (tbivp()) and start a new ASN version. That will automatically * force a new asn for any other processes the next time they want to * run. */ @@ -89,7 +60,7 @@ __get_new_mm_context(struct mm_struct *mm, long cpu) unsigned long next = asn + 1; if ((asn & HARDWARE_ASN_MASK) >= HARDWARE_ASN_MASK) { - tbiap(); + tbivp(); next = (asn & ~HARDWARE_ASN_MASK) + ASN_FIRST_VERSION; } cpu_last_asn(cpu) = next; @@ -97,18 +68,13 @@ __get_new_mm_context(struct mm_struct *mm, long cpu) } static inline void -switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, - struct task_struct *next) +switch_mm_irqs_off(struct mm_struct *prev_mm, struct mm_struct *next_mm, + struct task_struct *next) { /* Check if our ASN is of an older version, and thus invalid. */ - unsigned long asn; - unsigned long mmc; + unsigned long asn, mmc, ptbr; long cpu = smp_processor_id(); -#ifdef CONFIG_SMP - cpu_data[cpu].asn_lock = 1; - barrier(); -#endif asn = cpu_last_asn(cpu); mmc = next_mm->context.asid[cpu]; if ((mmc ^ asn) & ~HARDWARE_ASN_MASK) { @@ -116,50 +82,31 @@ switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, mmc = __get_new_mm_context(next_mm, cpu); next_mm->context.asid[cpu] = mmc; } -#ifdef CONFIG_SMP - else - cpu_data[cpu].need_new_asn = 1; -#endif /* - * Always update the PCB ASN. Another thread may have allocated - * a new mm->context (via flush_tlb_mm) without the ASN serial + * Update CSR:UPN and CSR:PTBR. Another thread may have allocated + * a new mm->context[asid] (via flush_tlb_mm) without the ASN serial * number wrapping. We have no way to detect when this is needed. */ - task_thread_info(next)->pcb.asn = mmc & HARDWARE_ASN_MASK; - /* - * Always update the PCB PTBR. If next is kernel thread, it must - * update PTBR. If next is user process, it's ok to update PTBR. - */ - task_thread_info(next)->pcb.ptbr = virt_to_pfn(next_mm->pgd); - load_asn_ptbr(task_thread_info(next)->pcb.asn, task_thread_info(next)->pcb.ptbr); + asn = mmc & HARDWARE_ASN_MASK; + ptbr = virt_to_pfn(next_mm->pgd); + load_asn_ptbr(asn, ptbr); } -extern void __load_new_mm_context(struct mm_struct *); - -#ifdef CONFIG_SMP -#define check_mmu_context() \ -do { \ - int cpu = smp_processor_id(); \ - cpu_data[cpu].asn_lock = 0; \ - barrier(); \ - if (cpu_data[cpu].need_new_asn) { \ - struct mm_struct *mm = current->active_mm; \ - cpu_data[cpu].need_new_asn = 0; \ - if (!mm->context.asid[cpu]) \ - __load_new_mm_context(mm); \ - } \ -} while (0) -#else -#define check_mmu_context() do { } while (0) -#endif +#define switch_mm_irqs_off switch_mm_irqs_off -static inline void activate_mm(struct mm_struct *prev_mm, - struct mm_struct *next_mm) +static inline void +switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, + struct task_struct *tsk) { - __load_new_mm_context(next_mm); + unsigned long flags; + + local_irq_save(flags); + switch_mm_irqs_off(prev_mm, next_mm, tsk); + local_irq_restore(flags); } +#define activate_mm(prev, next) switch_mm(prev, next, current) #define deactivate_mm(tsk, mm) do { } while (0) static inline int init_new_context(struct task_struct *tsk, @@ -169,8 +116,6 @@ static inline int init_new_context(struct task_struct *tsk, for_each_possible_cpu(i) mm->context.asid[i] = 0; - if (tsk != current) - task_thread_info(tsk)->pcb.ptbr = virt_to_pfn(mm->pgd); return 0; } @@ -182,7 +127,6 @@ static inline void destroy_context(struct mm_struct *mm) static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { - task_thread_info(tsk)->pcb.ptbr = virt_to_pfn(mm->pgd); } static inline int arch_dup_mmap(struct mm_struct *oldmm, diff --git a/arch/sw_64/include/asm/ptrace.h b/arch/sw_64/include/asm/ptrace.h index ac99430156639b8e96fe2e0307dcd90cb10a2df7..b5afebf82939c0dc94d0b984d53dcb616c2b6a92 100644 --- a/arch/sw_64/include/asm/ptrace.h +++ b/arch/sw_64/include/asm/ptrace.h @@ -40,12 +40,7 @@ struct pt_regs { unsigned long r26; unsigned long r27; unsigned long r28; - unsigned long hae; -/* JRP - These are the values provided to a0-a2 by HMcode */ - unsigned long trap_a0; - unsigned long trap_a1; - unsigned long trap_a2; -/* These are saved by HMcode: */ + /* These are saved by HMcode: */ unsigned long ps; unsigned long pc; unsigned long gp; @@ -54,7 +49,6 @@ struct pt_regs { unsigned long r18; }; -#define arch_has_single_step() (1) #define user_mode(regs) (((regs)->ps & 8) != 0) #define instruction_pointer(regs) ((regs)->pc) #define profile_pc(regs) instruction_pointer(regs) diff --git a/arch/sw_64/include/asm/signal.h b/arch/sw_64/include/asm/signal.h index 3e91b72c0b0a8af9c0803cc8d0257a1f917e4f77..0d846c1aa571e5ea946d7ef9422ba63d77d3bd30 100644 --- a/arch/sw_64/include/asm/signal.h +++ b/arch/sw_64/include/asm/signal.h @@ -14,9 +14,11 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; -#ifdef CONFIG_OLD_SIGACTION -#define __ARCH_HAS_SA_RESTORER -#endif +struct odd_sigaction { + __sighandler_t sa_handler; + old_sigset_t sa_mask; + int sa_flags; +}; #include #endif diff --git a/arch/sw_64/include/asm/suspend.h b/arch/sw_64/include/asm/suspend.h index 83fd413fd6e29c13b594e3945934fca76b6b6240..de6d97a0aff6d88956a90dc6bb57cb27c3dbaf26 100644 --- a/arch/sw_64/include/asm/suspend.h +++ b/arch/sw_64/include/asm/suspend.h @@ -39,7 +39,7 @@ struct processor_state { struct callee_saved_fpregs fpregs; unsigned long fpcr; #ifdef CONFIG_HIBERNATION - struct pcb_struct pcb; + unsigned long sp; struct vcpucb vcb; #endif }; diff --git a/arch/sw_64/include/asm/switch_to.h b/arch/sw_64/include/asm/switch_to.h index d503fc59390f51d0ba34d21785f0ac1811b453a8..e5596a735b2dbb4ef3fc98b86953db3c2666cd28 100644 --- a/arch/sw_64/include/asm/switch_to.h +++ b/arch/sw_64/include/asm/switch_to.h @@ -6,27 +6,39 @@ extern void __fpstate_save(struct task_struct *save_to); extern void __fpstate_restore(struct task_struct *restore_from); -extern struct task_struct *__switch_to(unsigned long pcb, - struct task_struct *prev, struct task_struct *next); +extern struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next); extern void restore_da_match_after_sched(void); -static inline void fpstate_save(struct task_struct *task) +static inline void aux_save(struct task_struct *task) { - if (likely(!(task->flags & PF_KTHREAD))) + struct pcb_struct *pcb; + + if (likely(!(task->flags & PF_KTHREAD))) { + pcb = &task_thread_info(task)->pcb; + pcb->usp = rdusp(); + pcb->tp = rtid(); __fpstate_save(task); + } } -static inline void fpstate_restore(struct task_struct *task) +static inline void aux_restore(struct task_struct *task) { - if (likely(!(task->flags & PF_KTHREAD))) + struct pcb_struct *pcb; + + if (likely(!(task->flags & PF_KTHREAD))) { + pcb = &task_thread_info(task)->pcb; + wrusp(pcb->usp); + wrtp(pcb->tp); __fpstate_restore(task); + } } static inline void __switch_to_aux(struct task_struct *prev, struct task_struct *next) { - fpstate_save(prev); - fpstate_restore(next); + aux_save(prev); + aux_restore(next); } @@ -34,10 +46,8 @@ static inline void __switch_to_aux(struct task_struct *prev, do { \ struct task_struct *__prev = (prev); \ struct task_struct *__next = (next); \ - __u64 __nextpcb = virt_to_phys(&task_thread_info(__next)->pcb); \ __switch_to_aux(__prev, __next); \ - (last) = __switch_to(__nextpcb, __prev, __next); \ - check_mmu_context(); \ + (last) = __switch_to(__prev, __next); \ } while (0) diff --git a/arch/sw_64/include/asm/thread_info.h b/arch/sw_64/include/asm/thread_info.h index 33b95f815448456b0bdd8723f102dc826285ab96..31740003d0b2d31204dc6ea36a14a7b9c8284595 100644 --- a/arch/sw_64/include/asm/thread_info.h +++ b/arch/sw_64/include/asm/thread_info.h @@ -15,13 +15,8 @@ typedef struct { struct pcb_struct { - unsigned long ksp; unsigned long usp; - unsigned long ptbr; - unsigned int pcc; - unsigned int asn; - unsigned long unique; - unsigned long flags; + unsigned long tp; unsigned long da_match, da_mask; unsigned long dv_match, dv_mask; unsigned long dc_ctl; @@ -39,14 +34,19 @@ struct thread_info { int preempt_count; /* 0 => preemptible, <0 => BUG */ unsigned int status; /* thread-synchronous flags */ - int bpt_nsaved; - unsigned long bpt_addr[2]; /* breakpoint handling */ - unsigned int bpt_insn[2]; #ifdef CONFIG_DYNAMIC_FTRACE unsigned long dyn_ftrace_addr; #endif }; +static __always_inline u64 rtid(void) +{ + u64 val; + + asm volatile("rtid %0" : "=r" (val) : :); + return val; +} + /* * Macros/functions for gaining access to the thread information structure. */ diff --git a/arch/sw_64/include/asm/tlbflush.h b/arch/sw_64/include/asm/tlbflush.h index 7805bb28725792fe480c98bdf31a645df573d200..b35af83e6ec271f27819b2f4a5c29ea13d16d0bf 100644 --- a/arch/sw_64/include/asm/tlbflush.h +++ b/arch/sw_64/include/asm/tlbflush.h @@ -8,13 +8,26 @@ #include #include #include - -extern void __load_new_mm_context(struct mm_struct *); - +#include static inline void flush_tlb_current(struct mm_struct *mm) { - __load_new_mm_context(mm); + unsigned long mmc, asn, ptbr, flags; + + local_irq_save(flags); + + mmc = __get_new_mm_context(mm, smp_processor_id()); + mm->context.asid[smp_processor_id()] = mmc; + + /* + * Force a new ASN for a task. Note that there is no way to + * write UPN only now, so call load_asn_ptbr here. + */ + asn = mmc & HARDWARE_ASN_MASK; + ptbr = virt_to_pfn(mm->pgd); + load_asn_ptbr(asn, ptbr); + + local_irq_restore(flags); } /* @@ -27,12 +40,10 @@ static inline void flush_tlb_current_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr) { - if (vma->vm_flags & VM_EXEC) { - tbi(3, addr); - if (icache_is_vivt_no_ictag()) - imb(); - } else - tbi(2, addr); + if (vma->vm_flags & VM_EXEC) + tbis(addr); + else + tbisd(addr); } @@ -65,7 +76,7 @@ static inline void flush_tlb_other(struct mm_struct *mm) */ static inline void flush_tlb_all(void) { - tbia(); + tbiv(); } /* Flush a specified user mapping. */ diff --git a/arch/sw_64/include/asm/uaccess.h b/arch/sw_64/include/asm/uaccess.h index ceacfaa07cfb2f76ba3602a209d33ad7083c613c..730121aad1840459b94d01737bfb50848c878246 100644 --- a/arch/sw_64/include/asm/uaccess.h +++ b/arch/sw_64/include/asm/uaccess.h @@ -292,6 +292,8 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long len) { return __copy_user((__force void *)to, from, len); } +#define INLINE_COPY_FROM_USER +#define INLINE_COPY_TO_USER extern long __clear_user(void __user *to, long len); diff --git a/arch/sw_64/include/asm/vcpu.h b/arch/sw_64/include/asm/vcpu.h index 5b3fe80aed1b47e457690694a8cb2c5be3bac57d..476c396c5aa403de009b7de90e5fba9cbf516c03 100644 --- a/arch/sw_64/include/asm/vcpu.h +++ b/arch/sw_64/include/asm/vcpu.h @@ -32,7 +32,7 @@ struct vcpucb { unsigned long vcpu_irq_disabled; unsigned long vcpu_irq; unsigned long ptbr; - unsigned long int_stat0; + unsigned long tid; unsigned long int_stat1; unsigned long int_stat2; unsigned long int_stat3; diff --git a/arch/sw_64/include/uapi/asm/hmcall.h b/arch/sw_64/include/uapi/asm/hmcall.h index f10378ba99c8042db62524c013ba64a44a5c2d48..dcff778e16163e0cdac3ae23b056dd8ccd263899 100644 --- a/arch/sw_64/include/uapi/asm/hmcall.h +++ b/arch/sw_64/include/uapi/asm/hmcall.h @@ -7,8 +7,10 @@ #define HMC_bpt 0x80 #define HMC_callsys 0x83 #define HMC_imb 0x86 -#define HMC_rdunique 0x9E -#define HMC_wrunique 0x9F +#define HMC_rdtp 0x9E +#define HMC_wrtp 0x9F +#define HMC_rdunique HMC_rdtp +#define HMC_wrunique HMC_wrtp #define HMC_gentrap 0xAA #define HMC_wrperfmon 0xB0 diff --git a/arch/sw_64/include/uapi/asm/kvm.h b/arch/sw_64/include/uapi/asm/kvm.h index ff1b6e7f096f77405394fa307a4f64b304598e33..126c2a1d74110ef6d8eb12b2314e2193c2d5e823 100644 --- a/arch/sw_64/include/uapi/asm/kvm.h +++ b/arch/sw_64/include/uapi/asm/kvm.h @@ -5,7 +5,7 @@ /* * KVM SW specific structures and definitions. */ -#define SWVM_IRQS 64 +#define SWVM_IRQS 256 enum SW64_KVM_IRQ { SW64_KVM_IRQ_IPI = 27, SW64_KVM_IRQ_TIMER = 9, diff --git a/arch/sw_64/include/uapi/asm/ptrace.h b/arch/sw_64/include/uapi/asm/ptrace.h index 80bad067fc15523e92a61f1cfed0a159e7803677..5cf3ca1d3dd843b0dc18b7c7452e9b7220d9359b 100644 --- a/arch/sw_64/include/uapi/asm/ptrace.h +++ b/arch/sw_64/include/uapi/asm/ptrace.h @@ -36,7 +36,8 @@ struct user_fpsimd_state { #define FPREG_END 62 #define FPCR 63 #define PC 64 -#define UNIQUE 65 +#define TP 65 +#define UNIQUE TP #define VECREG_BASE 67 #define VECREG_END 161 #define F31_V1 98 diff --git a/arch/sw_64/include/uapi/asm/sigcontext.h b/arch/sw_64/include/uapi/asm/sigcontext.h index facbf34e920d4f57b2ba51069e760920f8cde730..11d7eece86efa8e50c6ce5b7fb71e36341aad88a 100644 --- a/arch/sw_64/include/uapi/asm/sigcontext.h +++ b/arch/sw_64/include/uapi/asm/sigcontext.h @@ -2,15 +2,13 @@ #ifndef _UAPI_ASM_SW64_SIGCONTEXT_H #define _UAPI_ASM_SW64_SIGCONTEXT_H +/* + * Signal context structure + * + * The context is saved before a signal handler is invoked, and it is + * restored by sys_sigreturn / sys_rt_sigreturn. + */ struct sigcontext { - /* - * What should we have here? I'd probably better use the same - * stack layout as DEC Unix, just in case we ever want to try - * running their binaries.. - * - * This is the basic layout, but I don't know if we'll ever - * actually fill in all the values.. - */ long sc_onstack; long sc_mask; long sc_pc; @@ -19,6 +17,7 @@ struct sigcontext { long sc_ownedfp; long sc_fpregs[128]; /* SIMD-FP */ unsigned long sc_fpcr; + /* TODO: Following are unused, to be removed and synced with libc */ unsigned long sc_fp_control; unsigned long sc_reserved1, sc_reserved2; unsigned long sc_ssize; diff --git a/arch/sw_64/kernel/Makefile b/arch/sw_64/kernel/Makefile index d4dc9e175d67d21a64b15781e5bf2c09a5df64c3..02facabae2d9f736d121b1f8b628748af501b484 100644 --- a/arch/sw_64/kernel/Makefile +++ b/arch/sw_64/kernel/Makefile @@ -17,7 +17,7 @@ obj-y := entry.o fpu.o traps.o process.o sys_sw64.o irq.o \ irq_sw64.o signal.o setup.o ptrace.o time.o \ systbls.o dup_print.o tc.o timer.o \ insn.o early_init.o topology.o cacheinfo.o \ - vdso.o vdso/ + vdso.o vdso/ hmcall.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_STACKTRACE) += stacktrace.o @@ -31,9 +31,13 @@ obj-$(CONFIG_HIBERNATION) += hibernate_asm.o hibernate.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_PCI) += pci_common.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_DEBUG_FS) += segvdbg.o bindvcpu.o +obj-$(CONFIG_DEBUG_FS) += segvdbg.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o +ifeq ($(CONFIG_DEBUG_FS)$(CONFIG_NUMA),yy) +obj-y += bindvcpu.o +endif + ifndef CONFIG_PCI obj-y += pci-noop.o endif diff --git a/arch/sw_64/kernel/asm-offsets.c b/arch/sw_64/kernel/asm-offsets.c index 9e6c338a5edd8bc0ebef67b2ebc1ca3fdbdfc996..12b3311c1bcb75f25a825de2bf0d74083b5051c8 100644 --- a/arch/sw_64/kernel/asm-offsets.c +++ b/arch/sw_64/kernel/asm-offsets.c @@ -33,9 +33,8 @@ void foo(void) OFFSET(PSTATE_FPREGS, processor_state, fpregs); OFFSET(PSTATE_FPCR, processor_state, fpcr); #ifdef CONFIG_HIBERNATION - OFFSET(PSTATE_PCB, processor_state, pcb); + OFFSET(PSTATE_SP, processor_state, sp); #endif - OFFSET(PCB_KSP, pcb_struct, ksp); OFFSET(PBE_ADDR, pbe, address); OFFSET(PBE_ORIG_ADDR, pbe, orig_address); OFFSET(PBE_NEXT, pbe, next); @@ -89,9 +88,6 @@ void foo(void) DEFINE(PT_REGS_R26, offsetof(struct pt_regs, r26)); DEFINE(PT_REGS_R27, offsetof(struct pt_regs, r27)); DEFINE(PT_REGS_R28, offsetof(struct pt_regs, r28)); - DEFINE(PT_REGS_TRAP_A0, offsetof(struct pt_regs, trap_a0)); - DEFINE(PT_REGS_TRAP_A1, offsetof(struct pt_regs, trap_a1)); - DEFINE(PT_REGS_TRAP_A2, offsetof(struct pt_regs, trap_a2)); DEFINE(PT_REGS_PS, offsetof(struct pt_regs, ps)); DEFINE(PT_REGS_PC, offsetof(struct pt_regs, pc)); DEFINE(PT_REGS_GP, offsetof(struct pt_regs, gp)); @@ -222,4 +218,5 @@ void foo(void) OFFSET(TASK_THREAD_S5, task_struct, thread.s[5]); OFFSET(TASK_THREAD_S6, task_struct, thread.s[6]); BLANK(); + DEFINE(ASM_THREAD_SIZE, THREAD_SIZE); } diff --git a/arch/sw_64/kernel/bindvcpu.c b/arch/sw_64/kernel/bindvcpu.c index 611c395c144b69ea5133504214fc18c75065438a..46617eb68b7a3a09e0d264167e91bf615dfcd5c4 100644 --- a/arch/sw_64/kernel/bindvcpu.c +++ b/arch/sw_64/kernel/bindvcpu.c @@ -11,7 +11,8 @@ #include #include -extern bool bind_vcpu_enabled; +__read_mostly bool bind_vcpu_enabled; +EXPORT_SYMBOL(bind_vcpu_enabled); static int __init bind_vcpu_init(void) { diff --git a/arch/sw_64/kernel/clock.c b/arch/sw_64/kernel/clock.c index f31f596a00521e4b8ea193eb8adc69896f272c92..32f01d4b8255507b897873da790b7f98ba241d56 100644 --- a/arch/sw_64/kernel/clock.c +++ b/arch/sw_64/kernel/clock.c @@ -109,14 +109,21 @@ struct clk *sw64_clk_get(struct device *dev, const char *id) } EXPORT_SYMBOL(sw64_clk_get); -unsigned long sw64_clk_get_rate(struct clk *clk) +unsigned int __sw64_cpufreq_get(struct cpufreq_policy *policy) { - if (!clk) - return 0; + int i; + u64 val; - return (unsigned long)clk->rate; + val = sw64_io_read(0, CLK_CTL); + val = val >> CORE_PLL2_CFG_SHIFT; + + for (i = 0; i < sizeof(cpu_freq)/sizeof(int); i++) { + if (cpu_freq[val] == cpu_freq[i]) + return cpu_freq[i]; + } + return 0; } -EXPORT_SYMBOL(sw64_clk_get_rate); +EXPORT_SYMBOL(__sw64_cpufreq_get); void sw64_store_policy(struct cpufreq_policy *policy) { @@ -124,15 +131,17 @@ void sw64_store_policy(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(sw64_store_policy); -int sw64_set_rate(int index, unsigned long rate) +void sw64_set_rate(unsigned long rate) { unsigned int i, val; + int index = -1; rate /= 1000000; for (i = 0; i < sizeof(cpu_freq)/sizeof(int); i++) { if (rate == cpu_freq[i]) { index = i; + update_cpu_freq(cpu_freq[i]); break; } } @@ -178,7 +187,5 @@ int sw64_set_rate(int index, unsigned long rate) /* LV1 select PLL0/PLL1 */ sw64_io_write(0, CLU_LV1_SEL, CLK_LV1_SEL_MUXB | CLK_LV1_SEL_PRT); sw64_io_write(1, CLU_LV1_SEL, CLK_LV1_SEL_MUXB | CLK_LV1_SEL_PRT); - - return index; } EXPORT_SYMBOL_GPL(sw64_set_rate); diff --git a/arch/sw_64/kernel/early_init.c b/arch/sw_64/kernel/early_init.c index 392627bef8bb14708e56860f456ba53fd1bf5866..2f38719cc216ce2c84f0e1dfdd17d977b1324f02 100644 --- a/arch/sw_64/kernel/early_init.c +++ b/arch/sw_64/kernel/early_init.c @@ -23,6 +23,7 @@ static void __init sw64_setup_platform_ops(void) asmlinkage __visible void __init sw64_start_kernel(void) { + fixup_hmcall(); sw64_setup_chip_ops(); sw64_setup_platform_ops(); sw64_platform->ops_fixup(); diff --git a/arch/sw_64/kernel/entry.S b/arch/sw_64/kernel/entry.S index f79c9a6ddf3692f4c5427c7b83a555ac9d68fd71..67bafd4a930aa3423230644b925e918b883a787a 100644 --- a/arch/sw_64/kernel/entry.S +++ b/arch/sw_64/kernel/entry.S @@ -14,11 +14,10 @@ /* * This defines the normal kernel pt-regs layout. * - * regs 9-15 preserved by C code + * regs 9-15 preserved by C code, saving to pt_regs will make + * them easier to be accessed in an unified way. * regs 16-18 saved by HMcode * regs 29-30 saved and set up by HMcode - * JRP - Save regs 16-18 in a special area of the stack, so that - * the hmcode-provided values are available to the signal handler. */ .macro SAVE_COMMON_REGS @@ -42,9 +41,6 @@ stl $25, PT_REGS_R25($sp) stl $26, PT_REGS_R26($sp) stl $27, PT_REGS_R27($sp) - stl $16, PT_REGS_TRAP_A0($sp) - stl $17, PT_REGS_TRAP_A1($sp) - stl $18, PT_REGS_TRAP_A2($sp) .endm .macro RESTORE_COMMON_REGS @@ -384,11 +380,10 @@ $syscall_trace_failed: * Integer register context switch * The callee-saved registers must be saved and restored. * - * a0: physical address of next task's pcb, used by hmcode - * a1: previous task_struct (must be preserved across the switch) - * a2: next task_struct + * a0: previous task_struct (must be preserved across the switch) + * a1: next task_struct * - * The value of a1 must be preserved by this function, as that's how + * The value of a0 must be preserved by this function, as that's how * arguments are passed to schedule_tail. */ .align 4 @@ -397,33 +392,28 @@ $syscall_trace_failed: __switch_to: .prologue 0 /* Save context into prev->thread */ - stl $26, TASK_THREAD_RA($17) - stl $30, TASK_THREAD_SP($17) - stl $9, TASK_THREAD_S0($17) - stl $10, TASK_THREAD_S1($17) - stl $11, TASK_THREAD_S2($17) - stl $12, TASK_THREAD_S3($17) - stl $13, TASK_THREAD_S4($17) - stl $14, TASK_THREAD_S5($17) - stl $15, TASK_THREAD_S6($17) + stl $26, TASK_THREAD_RA($16) + stl $30, TASK_THREAD_SP($16) + stl $9, TASK_THREAD_S0($16) + stl $10, TASK_THREAD_S1($16) + stl $11, TASK_THREAD_S2($16) + stl $12, TASK_THREAD_S3($16) + stl $13, TASK_THREAD_S4($16) + stl $14, TASK_THREAD_S5($16) + stl $15, TASK_THREAD_S6($16) /* Restore context from next->thread */ - ldl $26, TASK_THREAD_RA($18) - ldl $9, TASK_THREAD_S0($18) - ldl $10, TASK_THREAD_S1($18) - ldl $11, TASK_THREAD_S2($18) - ldl $12, TASK_THREAD_S3($18) - ldl $13, TASK_THREAD_S4($18) - ldl $14, TASK_THREAD_S5($18) - ldl $15, TASK_THREAD_S6($18) - sys_call HMC_swpctx - /* - * SP has been saved and restored by HMC_swpctx, - * and restore it again here for future expansion. - */ - ldl $30, TASK_THREAD_SP($18) + ldl $26, TASK_THREAD_RA($17) + ldl $30, TASK_THREAD_SP($17) + ldl $9, TASK_THREAD_S0($17) + ldl $10, TASK_THREAD_S1($17) + ldl $11, TASK_THREAD_S2($17) + ldl $12, TASK_THREAD_S3($17) + ldl $13, TASK_THREAD_S4($17) + ldl $14, TASK_THREAD_S5($17) + ldl $15, TASK_THREAD_S6($17) ldi $8, 0x3fff bic $sp, $8, $8 - mov $17, $0 + mov $16, $0 ret .end __switch_to @@ -436,8 +426,7 @@ __switch_to: .ent ret_from_fork ret_from_fork: ldi $26, ret_from_sys_call - mov $17, $16 - jmp $31, schedule_tail + call $31, schedule_tail .end ret_from_fork /* @@ -447,7 +436,6 @@ ret_from_fork: .globl ret_from_kernel_thread .ent ret_from_kernel_thread ret_from_kernel_thread: - mov $17, $16 call $26, schedule_tail mov $9, $27 mov $10, $16 diff --git a/arch/sw_64/kernel/head.S b/arch/sw_64/kernel/head.S index 5fff0f33c9e2af000d361f899281bd8f7bc626a6..3dfb95c91d70b061f28439580a695f1d0370a83a 100644 --- a/arch/sw_64/kernel/head.S +++ b/arch/sw_64/kernel/head.S @@ -24,7 +24,7 @@ __start: /* We need to get current_task_info loaded up... */ ldi $8, init_thread_union /* ... and find our stack ... */ - ldi $30, 0x4000 - PT_REGS_SIZE($8) + ldi $30, ASM_THREAD_SIZE($8) /* ... and then we can clear bss data. */ ldi $2, __bss_start ldi $3, __bss_stop @@ -51,7 +51,7 @@ __start: ldl $29, 0($30) addl $29, $0, $29 /* Repoint the sp into the new kernel image */ - ldi $30, 0x4000 - PT_REGS_SIZE($8) + ldi $30, ASM_THREAD_SIZE($8) #endif /* ... and then we can start the kernel. */ call $26, sw64_start_kernel @@ -71,24 +71,20 @@ __smp_callin: br $27, 2f # we copy this from above "br $27 1f" 2: ldgp $29, 0($27) # First order of business, load the GP. - subl $31, 2, $16 + bis $31, $31, $16 # invalidate all TLB with current VPN sys_call HMC_tbi sys_call HMC_whami # Get hard cid - sll $0, 2, $0 ldi $1, __rcid_to_cpu - addl $1, $0, $1 + s4addl $0, $1, $1 ldw $0, 0($1) # Get logical cpu number - sll $0, 3, $0 - ldi $1, tidle_pcb - addl $1, $0, $1 - ldl $16, 0($1) # Get PCBB of idle thread + ldi $2, tidle_ksp + s8addl $0, $2, $2 + ldl $30, 0($2) # Get ksp of idle thread - sys_call HMC_swpctx - ldi $8, 0x3fff # Find "current". - bic $30, $8, $8 + ldi $8, -ASM_THREAD_SIZE($30) # Find "current" call $26, smp_callin sys_call HMC_halt diff --git a/arch/sw_64/kernel/hibernate.c b/arch/sw_64/kernel/hibernate.c index 33426e3ed305a60fdf5fd1088e0c799d946d84ea..0e7e860c507e7576b1bdb895d94a374aa584918e 100644 --- a/arch/sw_64/kernel/hibernate.c +++ b/arch/sw_64/kernel/hibernate.c @@ -14,7 +14,7 @@ void save_processor_state(void) vcb->ksp = rdksp(); vcb->usp = rdusp(); - vcb->pcbb = rdpcbb(); + vcb->tid = rtid(); vcb->ptbr = rdptbr(); } @@ -24,11 +24,10 @@ void restore_processor_state(void) wrksp(vcb->ksp); wrusp(vcb->usp); - wrpcbb(vcb->pcbb); + wrtp(vcb->tid); wrptbr(vcb->ptbr); sflush(); - tbia(); - imb(); + tbiv(); } int swsusp_arch_resume(void) diff --git a/arch/sw_64/kernel/hibernate_asm.S b/arch/sw_64/kernel/hibernate_asm.S index 3acbcdbae0b3df4ce838ff81b91f6ffc73f9e31f..23bab0d6edd87567c02ef218d7d9305e7c4efdc6 100644 --- a/arch/sw_64/kernel/hibernate_asm.S +++ b/arch/sw_64/kernel/hibernate_asm.S @@ -30,8 +30,7 @@ ENTRY(swsusp_arch_suspend) rfpcr $f0 fstd $f0, PSTATE_FPCR($16) - ldi $1, PSTATE_PCB($16) - stl sp, PCB_KSP($1) + stl sp, PSTATE_SP($16) call swsusp_save ldi $16, hibernate_state ldi $1, PSTATE_REGS($16) @@ -112,8 +111,7 @@ $hibernate_setfpec_over: vldd $f8, CALLEE_F8($1) vldd $f9, CALLEE_F9($1) - ldi $1, PSTATE_PCB($16) - ldl sp, PCB_KSP($1) + ldl sp, PSTATE_SP($16) ldi $8, 0x3fff bic sp, $8, $8 diff --git a/arch/sw_64/kernel/hmcall.c b/arch/sw_64/kernel/hmcall.c new file mode 100644 index 0000000000000000000000000000000000000000..b81d7fff1c347d58f3c03935f76d532eb35d093c --- /dev/null +++ b/arch/sw_64/kernel/hmcall.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * arch/sw_64/kernel/hmcall.c + * + * Copyright (C) 2022 WXIAT + * Author: He Sheng + */ + +#include +#include + +#define A0(func) (((HMC_##func & 0xFF) >> 6) & 0x1) +#define A1(func) ((((HMC_##func & 0xFF)>>6) & 0x2) >> 1) +#define A2(func) ((HMC_##func & 0x3F) << 7) + +#define T(func) ((A0(func) ^ A1(func)) & 0x1) +#define B0(func) ((T(func) | A0(func)) << 13) +#define B1(func) (((~T(func) & 1) | A1(func)) << 14) + +#define PRI_BASE 0x10000UL + +#define HMCALL_ENTRY(func) (PRI_BASE | B1(func) | B0(func) | A2(func)) + + +static inline void fixup_rdtp(void) +{ + unsigned int *entry = __va(HMCALL_ENTRY(rdtp)); + + entry[0] = 0x181ffec7; /* pri_rcsr $0, CSR__TID */ + entry[1] = 0x1ee00000; /* pri_ret $23 */ +} + +static inline void fixup_wrtp(void) +{ + unsigned int *entry = __va(HMCALL_ENTRY(wrtp)); + + entry[0] = 0x1a1fffc7; /* pri_wcsr $16, CSR__TID */ + entry[1] = 0x1ee00000; /* pri_ret $23 */ +} + +void __init fixup_hmcall(void) +{ +#if defined(CONFIG_SUBARCH_C3A) || defined(CONFIG_SUBARCH_C3B) + fixup_rdtp(); + fixup_wrtp(); +#endif +} + +#undef A0 +#undef A1 +#undef A2 +#undef T +#undef B0 +#undef B1 diff --git a/arch/sw_64/kernel/irq_sw64.c b/arch/sw_64/kernel/irq_sw64.c index 8ab845d153eb15ddf1978e069680757d2cdd4136..88809fa531dd0de199abe83dc48f0b8f4524efc5 100644 --- a/arch/sw_64/kernel/irq_sw64.c +++ b/arch/sw_64/kernel/irq_sw64.c @@ -9,15 +9,6 @@ #include #include -asmlinkage void -do_entInt(unsigned long type, unsigned long vector, - unsigned long irq_arg, struct pt_regs *regs) -{ - local_irq_disable(); - handle_chip_irq(type, vector, irq_arg, regs); -} -EXPORT_SYMBOL(do_entInt); - void __init init_IRQ(void) { diff --git a/arch/sw_64/kernel/kgdb.c b/arch/sw_64/kernel/kgdb.c index ac2f397f16096b39454c766c48e7054c221474c3..95970b293de0773234137d29b9ca263a15bac8d1 100644 --- a/arch/sw_64/kernel/kgdb.c +++ b/arch/sw_64/kernel/kgdb.c @@ -95,7 +95,7 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { { "pc", 8, offsetof(struct pt_regs, pc)}, { "", 8, -1 }, - { "unique", 8, -1}, + { "tp", 8, -1}, }; char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) diff --git a/arch/sw_64/kernel/machine_kexec.c b/arch/sw_64/kernel/machine_kexec.c index c9ca7a728bd458f323575ceae67657b379d6d925..950998476cdaced4b7368cb4712a1d7081e11047 100644 --- a/arch/sw_64/kernel/machine_kexec.c +++ b/arch/sw_64/kernel/machine_kexec.c @@ -204,9 +204,6 @@ void machine_kexec(struct kimage *image) pr_info("Will call new kernel at %08lx\n", image->start); pr_info("Bye ...\n"); - //flush_cache_all(); - //sflush(); - //tbia(); smp_wmb(); ((noretfun_t) reboot_code_buffer)(); } diff --git a/arch/sw_64/kernel/pci.c b/arch/sw_64/kernel/pci.c index fcc6e0f02a93aa93c7cfad18ff960973c08dbb50..6cc872ba9ca54786c9bd44db3bd4a7f9665afde0 100644 --- a/arch/sw_64/kernel/pci.c +++ b/arch/sw_64/kernel/pci.c @@ -614,7 +614,8 @@ void __init sw64_init_arch(void) cpu_num = sw64_chip->get_cpu_num(); for (node = 0; node < cpu_num; node++) { - set_devint_wken(node); + if (is_in_host()) + set_devint_wken(node); rc_enable = sw64_chip_init->pci_init.get_rc_enable(node); if (rc_enable == 0) { printk("PCIe is disabled on node %ld\n", node); diff --git a/arch/sw_64/kernel/perf_regs.c b/arch/sw_64/kernel/perf_regs.c index 4c12a2cdf912020c4e49df19f4c9fa99e2ffae36..b036f213936bc6d79214c9b7bdf1ab9a82a40b69 100644 --- a/arch/sw_64/kernel/perf_regs.c +++ b/arch/sw_64/kernel/perf_regs.c @@ -28,6 +28,6 @@ u64 perf_reg_abi(struct task_struct *task) void perf_get_regs_user(struct perf_regs *regs_user, struct pt_regs *regs) { - regs_user->regs = NULL; - regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); } diff --git a/arch/sw_64/kernel/process.c b/arch/sw_64/kernel/process.c index a75ae20205f3215e92257b6edb8075efcc198d70..e1689d25f77d25300164ae39a6f4d039eae77069 100644 --- a/arch/sw_64/kernel/process.c +++ b/arch/sw_64/kernel/process.c @@ -52,7 +52,7 @@ void arch_cpu_idle(void) static void common_shutdown_1(void *generic_ptr) { struct halt_info *how = (struct halt_info *)generic_ptr; - int cpuid = smp_processor_id(); + int cpuid __maybe_unused = smp_processor_id(); /* No point in taking interrupts anymore. */ local_irq_disable(); @@ -102,17 +102,6 @@ void machine_power_off(void) } -/* Used by sysrq-p, among others. I don't believe r9-r15 are ever - * saved in the context it's used. - */ - -void -show_regs(struct pt_regs *regs) -{ - show_regs_print_info(KERN_DEFAULT); - dik_show_regs(regs); -} - /* * Re-start a thread when doing execve() */ @@ -136,7 +125,7 @@ flush_thread(void) wrfpcr(FPCR_DYN_NORMAL | ieee_swcr_to_fpcr(0)); /* Clean slate for TLS. */ - current_thread_info()->pcb.unique = 0; + current_thread_info()->pcb.tp = 0; } void @@ -146,7 +135,11 @@ release_thread(struct task_struct *dead_task) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - fpstate_save(src); + /* + * aux_save() has to read the current TLS pointer from CSR:TID as it + * may be out-of-sync with the saved value. + */ + aux_save(src); *dst = *src; return 0; } @@ -167,8 +160,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp, struct pt_regs *childregs = task_pt_regs(p); struct pt_regs *regs = current_pt_regs(); - childti->pcb.ksp = (unsigned long) childregs; - childti->pcb.flags = 7; /* set FEN, clear everything else */ p->thread.sp = (unsigned long) childregs; if (unlikely(p->flags & PF_KTHREAD)) { @@ -180,6 +171,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, childti->pcb.usp = 0; return 0; } + /* * Note: if CLONE_SETTLS is not set, then we must inherit the * value from the parent, which will have been set by the block @@ -188,10 +180,11 @@ copy_thread(unsigned long clone_flags, unsigned long usp, * application calling fork. */ if (clone_flags & CLONE_SETTLS) - childti->pcb.unique = tls; + childti->pcb.tp = regs->r20; else regs->r20 = 0; - childti->pcb.usp = usp ?: rdusp(); + if (usp) + childti->pcb.usp = usp; *childregs = *regs; childregs->r0 = 0; childregs->r19 = 0; @@ -214,7 +207,7 @@ void sw64_elf_core_copy_regs(elf_greg_t *dest, struct pt_regs *regs) dest[i] = *(__u64 *)((void *)regs + regoffsets[i]); dest[30] = ti == current_thread_info() ? rdusp() : ti->pcb.usp; dest[31] = regs->pc; - dest[32] = ti->pcb.unique; + dest[32] = ti->pcb.tp; } EXPORT_SYMBOL(sw64_elf_core_copy_regs); diff --git a/arch/sw_64/kernel/proto.h b/arch/sw_64/kernel/proto.h index 189074f8bd5c7892afe3a1f6720a9322a3d3b5ba..8c31eca3cc3226a60de293ab2bdc16022d414e10 100644 --- a/arch/sw_64/kernel/proto.h +++ b/arch/sw_64/kernel/proto.h @@ -7,13 +7,9 @@ #include #include -/* ptrace.c */ -extern int ptrace_set_bpt(struct task_struct *child); -extern int ptrace_cancel_bpt(struct task_struct *child); - /* traps.c */ -extern void dik_show_regs(struct pt_regs *regs); -extern void die_if_kernel(char *str, struct pt_regs *regs, long err); +extern void show_regs(struct pt_regs *regs); +extern void die(char *str, struct pt_regs *regs, long err); /* timer.c */ extern void setup_timer(void); diff --git a/arch/sw_64/kernel/ptrace.c b/arch/sw_64/kernel/ptrace.c index bdbd0d97a130910a170a836b67ee2f82b30ec354..f3bc1020eaffef8954235379f5bb9b36cef0201e 100644 --- a/arch/sw_64/kernel/ptrace.c +++ b/arch/sw_64/kernel/ptrace.c @@ -72,7 +72,7 @@ short regoffsets[32] = { static int pcboff[] = { [USP] = PCB_OFF(usp), - [UNIQUE] = PCB_OFF(unique), + [TP] = PCB_OFF(tp), [DA_MATCH] = PCB_OFF(da_match), [DA_MASK] = PCB_OFF(da_mask), [DV_MATCH] = PCB_OFF(dv_match), @@ -154,119 +154,12 @@ put_reg(struct task_struct *task, unsigned long regno, unsigned long data) return 0; } -static inline int -read_int(struct task_struct *task, unsigned long addr, int *data) -{ - int copied = access_process_vm(task, addr, data, sizeof(int), FOLL_FORCE); - - return (copied == sizeof(int)) ? 0 : -EIO; -} - -static inline int -write_int(struct task_struct *task, unsigned long addr, int data) -{ - int copied = access_process_vm(task, addr, &data, sizeof(int), - FOLL_FORCE | FOLL_WRITE); - return (copied == sizeof(int)) ? 0 : -EIO; -} - -/* - * Set breakpoint. - */ -int -ptrace_set_bpt(struct task_struct *child) -{ - int displ, i, res, reg_b, nsaved = 0; - unsigned int insn, op_code; - unsigned long pc; - - pc = get_reg(child, REG_PC); - res = read_int(child, pc, (int *)&insn); - if (res < 0) - return res; - - op_code = insn >> 26; - /* br bsr beq bne blt ble bgt bge blbc blbs fbeq fbne fblt fble fbgt fbge */ - if ((1UL << op_code) & 0x3fff000000000030UL) { - /* - * It's a branch: instead of trying to figure out - * whether the branch will be taken or not, we'll put - * a breakpoint at either location. This is simpler, - * more reliable, and probably not a whole lot slower - * than the alternative approach of emulating the - * branch (emulation can be tricky for fp branches). - */ - displ = ((s32)(insn << 11)) >> 9; - task_thread_info(child)->bpt_addr[nsaved++] = pc + 4; - if (displ) /* guard against unoptimized code */ - task_thread_info(child)->bpt_addr[nsaved++] - = pc + 4 + displ; - /*call ret jmp*/ - } else if (op_code >= 0x1 && op_code <= 0x3) { - reg_b = (insn >> 16) & 0x1f; - task_thread_info(child)->bpt_addr[nsaved++] = get_reg(child, reg_b); - } else { - task_thread_info(child)->bpt_addr[nsaved++] = pc + 4; - } - - /* install breakpoints: */ - for (i = 0; i < nsaved; ++i) { - res = read_int(child, task_thread_info(child)->bpt_addr[i], - (int *)&insn); - if (res < 0) - return res; - task_thread_info(child)->bpt_insn[i] = insn; - res = write_int(child, task_thread_info(child)->bpt_addr[i], - BREAKINST); - if (res < 0) - return res; - } - task_thread_info(child)->bpt_nsaved = nsaved; - return 0; -} - /* - * Ensure no single-step breakpoint is pending. Returns non-zero - * value if child was being single-stepped. - */ -int -ptrace_cancel_bpt(struct task_struct *child) -{ - int i, nsaved = task_thread_info(child)->bpt_nsaved; - - task_thread_info(child)->bpt_nsaved = 0; - - if (nsaved > 2) { - printk("%s: bogus nsaved: %d!\n", __func__, nsaved); - nsaved = 2; - } - - for (i = 0; i < nsaved; ++i) { - write_int(child, task_thread_info(child)->bpt_addr[i], - task_thread_info(child)->bpt_insn[i]); - } - return (nsaved != 0); -} - -void user_enable_single_step(struct task_struct *child) -{ - /* Mark single stepping. */ - task_thread_info(child)->bpt_nsaved = -1; -} - -void user_disable_single_step(struct task_struct *child) -{ - ptrace_cancel_bpt(child); -} - -/* - * Called by kernel/ptrace.c when detaching.. - * - * Make sure the single step bit is not set. + * Called by ptrace_detach */ void ptrace_disable(struct task_struct *child) { - user_disable_single_step(child); + /**/ } static int gpr_get(struct task_struct *target, @@ -487,7 +380,7 @@ int do_match(unsigned long address, unsigned long mmcsr, long cause, struct pt_r case MMCSR__DA_MATCH: case MMCSR__DV_MATCH: case MMCSR__DAV_MATCH: - dik_show_regs(regs); + show_regs(regs); if (!(current->ptrace & PT_PTRACED)) { printk(" pid %d %s not be ptraced, return\n", current->pid, current->comm); @@ -611,10 +504,6 @@ static const struct pt_regs_offset regoffset_table[] = { REG_OFFSET_NAME(r26), REG_OFFSET_NAME(r27), REG_OFFSET_NAME(r28), - REG_OFFSET_NAME(hae), - REG_OFFSET_NAME(trap_a0), - REG_OFFSET_NAME(trap_a1), - REG_OFFSET_NAME(trap_a2), REG_OFFSET_NAME(ps), REG_OFFSET_NAME(pc), REG_OFFSET_NAME(gp), diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index 0e93643539d32c770a9b7b07f23a469475fa0b47..2d2a8c6d4b4e6eea55b91e84b5931758a7c05527 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -28,9 +28,10 @@ #include #include -#include #include #include +#include +#include #include "proto.h" #include "pci_impl.h" @@ -137,6 +138,14 @@ struct screen_info screen_info = { }; EXPORT_SYMBOL(screen_info); +/* + * Move global data into per-processor storage. + */ +void store_cpu_data(int cpu) +{ + cpu_data[cpu].last_asn = ASN_FIRST_VERSION; +} + #ifdef CONFIG_KEXEC void *kexec_control_page; @@ -859,13 +868,12 @@ setup_arch(char **cmdline_p) /* Default root filesystem to sda2. */ ROOT_DEV = Root_SDA2; - /* - * Identify the flock of penguins. - */ - #ifdef CONFIG_SMP setup_smp(); +#else + store_cpu_data(0); #endif + #ifdef CONFIG_NUMA cpu_set_node(); #endif diff --git a/arch/sw_64/kernel/signal.c b/arch/sw_64/kernel/signal.c index 6a6203ccb04f489ef0b6b1bbf59b4635c3f88d50..32c9484d2aa2e3f05d0d48957f5acc197159ebd1 100644 --- a/arch/sw_64/kernel/signal.c +++ b/arch/sw_64/kernel/signal.c @@ -38,6 +38,36 @@ SYSCALL_DEFINE2(odd_sigprocmask, int, how, unsigned long, newmask) return res; } +SYSCALL_DEFINE3(odd_sigaction, int, sig, + const struct odd_sigaction __user *, act, + struct odd_sigaction __user *, oact) +{ + struct k_sigaction new_ka, old_ka; + old_sigset_t mask; + int ret; + + if (act) { + if (!access_ok(act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) + return -EFAULT; + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (!access_ok(oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) + return -EFAULT; + } + + return ret; +} + /* * Do a signal return; undo the signal stack. */ @@ -133,11 +163,6 @@ do_sigreturn(struct sigcontext __user *sc) if (restore_sigcontext(sc, regs)) goto give_sigsegv; - /* Send SIGTRAP if we're single-stepping: */ - if (ptrace_cancel_bpt(current)) { - force_sig_fault(SIGTRAP, TRAP_BRKPT, - (void __user *)regs->pc, 0); - } return; give_sigsegv: @@ -164,11 +189,6 @@ do_rt_sigreturn(struct rt_sigframe __user *frame) if (restore_altstack(&frame->uc.uc_stack)) goto give_sigsegv; - /* Send SIGTRAP if we're single-stepping: */ - if (ptrace_cancel_bpt(current)) { - force_sig_fault(SIGTRAP, TRAP_BRKPT, - (void __user *)regs->pc, 0); - } return; give_sigsegv: @@ -235,10 +255,6 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, offsetof(struct user_fpsimd_state, fpcr)); err |= __put_user(current->thread.fpstate.fpcr, &sc->sc_fpcr); - err |= __put_user(regs->trap_a0, &sc->sc_traparg_a0); - err |= __put_user(regs->trap_a1, &sc->sc_traparg_a1); - err |= __put_user(regs->trap_a2, &sc->sc_traparg_a2); - return err; } @@ -351,19 +367,15 @@ syscall_restart(unsigned long r0, unsigned long r19, static void do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19) { - unsigned long single_stepping = ptrace_cancel_bpt(current); struct ksignal ksig; /* This lets the debugger run, ... */ if (get_signal(&ksig)) { - /* ... so re-check the single stepping. */ - single_stepping |= ptrace_cancel_bpt(current); /* Whee! Actually deliver the signal. */ if (r0) syscall_restart(r0, r19, regs, &ksig.ka); handle_signal(&ksig, regs); } else { - single_stepping |= ptrace_cancel_bpt(current); if (r0) { switch (regs->r0) { case ERESTARTNOHAND: @@ -383,8 +395,6 @@ do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19) } restore_saved_sigmask(); } - if (single_stepping) - ptrace_set_bpt(current); /* re-set breakpoint */ } void diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index fb915d1660691e19d92b9dd648a932b978eed6a7..c0936d119c4ed62c19b43fe37a295a72a27271b3 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -34,7 +34,7 @@ EXPORT_SYMBOL(__cpu_to_rcid); int __rcid_to_cpu[NR_CPUS]; /* Map physical to logical */ EXPORT_SYMBOL(__rcid_to_cpu); -unsigned long tidle_pcb[NR_CPUS]; +void *tidle_ksp[NR_CPUS]; /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; @@ -59,29 +59,6 @@ EXPORT_SYMBOL(smp_num_cpus); #define send_sleep_interrupt(cpu) send_ipi((cpu), II_SLEEP) #define send_wakeup_interrupt(cpu) send_ipi((cpu), II_WAKE) -/* - * Called by both boot and secondaries to move global data into - * per-processor storage. - */ -static inline void __init -smp_store_cpu_info(int cpuid) -{ - cpu_data[cpuid].loops_per_jiffy = loops_per_jiffy; - cpu_data[cpuid].last_asn = ASN_FIRST_VERSION; - cpu_data[cpuid].need_new_asn = 0; - cpu_data[cpuid].asn_lock = 0; -} - -/* - * Ideally sets up per-cpu profiling hooks. Doesn't do much now... - */ -static inline void __init -smp_setup_percpu_timer(int cpuid) -{ - setup_timer(); - cpu_data[cpuid].prof_counter = 1; - cpu_data[cpuid].prof_multiplier = 1; -} static void __init wait_boot_cpu_to_stop(int cpuid) { @@ -128,11 +105,13 @@ void smp_callin(void) wrent(entInt, 0); /* Get our local ticker going. */ - smp_setup_percpu_timer(cpuid); + setup_timer(); /* All kernel threads share the same mm context. */ mmgrab(&init_mm); current->active_mm = &init_mm; + /* update csr:ptbr */ + wrptbr(virt_to_phys(init_mm.pgd)); /* inform the notifiers about the new cpu */ notify_cpu_starting(cpuid); @@ -176,23 +155,11 @@ static inline void set_secondary_ready(int cpuid) */ static int secondary_cpu_start(int cpuid, struct task_struct *idle) { - struct pcb_struct *ipcb; unsigned long timeout; - - ipcb = &task_thread_info(idle)->pcb; - /* - * Initialize the idle's PCB to something just good enough for - * us to get started. Immediately after starting, we'll swpctx - * to the target idle task's pcb. Reuse the stack in the mean - * time. Precalculate the target PCBB. + * Precalculate the target ksp. */ - ipcb->ksp = (unsigned long)ipcb + sizeof(union thread_union) - 16; - ipcb->usp = 0; - ipcb->pcc = 0; - ipcb->asn = 0; - tidle_pcb[cpuid] = ipcb->unique = virt_to_phys(ipcb); - ipcb->dv_match = ipcb->dv_mask = 0; + tidle_ksp[cpuid] = idle->stack + THREAD_SIZE; DBGS("Starting secondary cpu %d: state 0x%lx\n", cpuid, idle->state); @@ -298,7 +265,7 @@ void __init setup_smp(void) __cpu_to_rcid[num] = i; __rcid_to_cpu[i] = num; set_cpu_possible(num, true); - smp_store_cpu_info(num); + store_cpu_data(num); if (!cpumask_test_cpu(i, &cpu_offline)) set_cpu_present(num, true); num++; @@ -407,18 +374,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) void __init native_smp_cpus_done(unsigned int max_cpus) { - int cpu; - unsigned long bogosum = 0; - - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (cpu_online(cpu)) - bogosum += cpu_data[cpu].loops_per_jiffy; - smp_booted = 1; - pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - num_online_cpus(), - (bogosum + 2500) / (500000/HZ), - ((bogosum + 2500) / (5000/HZ)) % 100); + pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); } int setup_profiling_timer(unsigned int multiplier) @@ -519,22 +476,9 @@ void native_send_call_func_single_ipi(int cpu) send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC); } -static void -ipi_imb(void *ignored) -{ - imb(); -} - -void smp_imb(void) -{ - /* Must wait other processors to flush their icache before continue. */ - on_each_cpu(ipi_imb, NULL, 1); -} -EXPORT_SYMBOL(smp_imb); - static void ipi_flush_tlb_all(void *ignored) { - tbia(); + tbiv(); } void flush_tlb_all(void) @@ -545,8 +489,6 @@ void flush_tlb_all(void) on_each_cpu(ipi_flush_tlb_all, NULL, 1); } -#define asn_locked() (cpu_data[smp_processor_id()].asn_lock) - static void ipi_flush_tlb_mm(void *x) { struct mm_struct *mm = (struct mm_struct *) x; @@ -651,50 +593,6 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned l } EXPORT_SYMBOL(flush_tlb_range); -static void ipi_flush_icache_page(void *x) -{ - struct mm_struct *mm = (struct mm_struct *) x; - - if (mm == current->mm) - __load_new_mm_context(mm); - else - flush_tlb_other(mm); -} - -void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, - unsigned long addr, int len) -{ - struct mm_struct *mm = vma->vm_mm; - - if ((vma->vm_flags & VM_EXEC) == 0) - return; - if (!icache_is_vivt_no_ictag()) - return; - - preempt_disable(); - - if (mm == current->mm) { - __load_new_mm_context(mm); - if (atomic_read(&mm->mm_users) == 1) { - int cpu, this_cpu = smp_processor_id(); - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (!cpu_online(cpu) || cpu == this_cpu) - continue; - if (mm->context.asid[cpu]) - mm->context.asid[cpu] = 0; - } - preempt_enable(); - return; - } - } else - flush_tlb_other(mm); - - smp_call_function(ipi_flush_icache_page, mm, 1); - - preempt_enable(); -} - int native_cpu_disable(void) { int cpu = smp_processor_id(); diff --git a/arch/sw_64/kernel/suspend.c b/arch/sw_64/kernel/suspend.c index 369bc1e19b85713cb0ebe9a0719fd3a7a68ec358..994d8e245878797917f75a1e1630cc0ff1d09529 100644 --- a/arch/sw_64/kernel/suspend.c +++ b/arch/sw_64/kernel/suspend.c @@ -33,6 +33,7 @@ void sw64_suspend_enter(void) */ disable_local_timer(); + current_thread_info()->pcb.tp = rtid(); #ifdef CONFIG_SW64_SUSPEND_DEEPSLEEP_BOOTCORE sw64_suspend_deep_sleep(&suspend_state); @@ -40,6 +41,7 @@ void sw64_suspend_enter(void) mtinten(); asm("halt"); #endif + wrtp(current_thread_info()->pcb.tp); disable_local_timer(); } diff --git a/arch/sw_64/kernel/syscalls/syscall.tbl b/arch/sw_64/kernel/syscalls/syscall.tbl index 42a179422b6b22fbe14fa10237dfb93ba6dc9261..35d108b49a61ca7ac1058272e89dc2ecacf61923 100644 --- a/arch/sw_64/kernel/syscalls/syscall.tbl +++ b/arch/sw_64/kernel/syscalls/syscall.tbl @@ -163,7 +163,7 @@ #153 is unused #154 is unused #155 is unused -156 common sigaction sys_sigaction +156 common sigaction sys_odd_sigaction #157 is unused #158 is unused #159 is unused diff --git a/arch/sw_64/kernel/time.c b/arch/sw_64/kernel/time.c index 15035a01e48a8856b73b40d2b2349f38d19223e1..3aa55c886e384a8f6d57ec2ee26312321d38560f 100644 --- a/arch/sw_64/kernel/time.c +++ b/arch/sw_64/kernel/time.c @@ -4,6 +4,9 @@ #include #include #include +#ifndef CONFIG_SMP +#include +#endif #include @@ -93,10 +96,6 @@ void setup_clocksource(void) } #endif /* !CONFIG_SMP */ -void __init common_init_rtc(void) -{ - setup_timer(); -} void __init time_init(void) @@ -111,15 +110,9 @@ time_init(void) setup_clocksource(); of_clk_init(NULL); /* Startup the timer source. */ - common_init_rtc(); -} - -void calibrate_delay(void) -{ - loops_per_jiffy = get_cpu_freq() / HZ; - pr_info("Clock rate yields %lu.%02lu BogoMIPS (lpj=%lu)\n", - loops_per_jiffy / (500000 / HZ), - (loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy); + setup_timer(); + /* Calibrate the delay loop directly */ + lpj_fine = cycle_freq / HZ; } static void __init calibrate_sched_clock(void) diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index 4e95cab13daafa120daad777a81c3811db70f739..f01b88e53ff2dfacbb1b0490c98f5ca6780a2695 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include @@ -29,8 +31,18 @@ #include "proto.h" -void dik_show_regs(struct pt_regs *regs) +enum SW64_IF_TYPES { + IF_BREAKPOINT = 0, + IF_RESERVED, + IF_GENTRAP, + IF_FEN, + IF_OPDEC, +}; + +void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_DEFAULT); + printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx %s\n", regs->pc, regs->r26, regs->ps, print_tainted()); printk("pc is at %pSR\n", (void *)regs->pc); @@ -60,8 +72,7 @@ void dik_show_regs(struct pt_regs *regs) printk("gp = %016lx sp = %p\n", regs->gp, regs+1); } -static void -dik_show_code(unsigned int *pc) +static void show_code(unsigned int *pc) { long i; unsigned int insn; @@ -75,33 +86,43 @@ dik_show_code(unsigned int *pc) printk("\n"); } -void die_if_kernel(char *str, struct pt_regs *regs, long err) +static DEFINE_SPINLOCK(die_lock); + +void die(char *str, struct pt_regs *regs, long err) { - if (regs->ps & 8) - return; -#ifdef CONFIG_SMP - printk("CPU %d ", hard_smp_processor_id()); -#endif - printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err); - dik_show_regs(regs); - add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + static int die_counter; + unsigned long flags; + int ret; + + oops_enter(); + + spin_lock_irqsave(&die_lock, flags); + console_verbose(); + bust_spinlocks(1); + + pr_emerg("%s [#%d]\n", str, ++die_counter); + + ret = notify_die(DIE_OOPS, str, regs, err, 0, SIGSEGV); + + print_modules(); + show_regs(regs); + show_code((unsigned int *)regs->pc); show_stack(current, NULL, KERN_EMERG); - dik_show_code((unsigned int *)regs->pc); - if (test_and_set_thread_flag(TIF_DIE_IF_KERNEL)) { - printk("die_if_kernel recursion detected.\n"); - local_irq_enable(); - while (1) - asm("nop"); - } + bust_spinlocks(0); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + spin_unlock_irqrestore(&die_lock, flags); + oops_exit(); if (kexec_should_crash(current)) crash_kexec(regs); - + if (in_interrupt()) + panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); - do_exit(SIGSEGV); + if (ret != NOTIFY_STOP) + do_exit(SIGSEGV); } #ifndef CONFIG_MATHEMU @@ -135,11 +156,17 @@ do_entArith(unsigned long summary, unsigned long write_mask, if (si_code == 0) return; } - die_if_kernel("Arithmetic fault", regs, 0); + + if (!user_mode(regs)) + die("Arithmetic fault", regs, 0); force_sig_fault(SIGFPE, si_code, (void __user *)regs->pc, 0); } +/* + * BPT/GENTRAP/OPDEC make regs->pc = exc_pc + 4. debugger should + * do something necessary to handle it correctly. + */ asmlinkage void do_entIF(unsigned long inst_type, struct pt_regs *regs) { @@ -149,35 +176,23 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) type = inst_type & 0xffffffff; inst = inst_type >> 32; - if ((regs->ps & ~IPL_MAX) == 0 && type != 4) { - if (type == 1) { - const unsigned int *data - = (const unsigned int *) regs->pc; - printk("Kernel bug at %s:%d\n", - (const char *)(data[1] | (long)data[2] << 32), - data[0]); - } else if (type == 0) { + if (!user_mode(regs) && type != IF_OPDEC) { + if (type == IF_BREAKPOINT) { /* support kgdb */ notify_die(0, "kgdb trap", regs, 0, 0, SIGTRAP); return; } - die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"), + die((type == IF_RESERVED ? "Kernel Bug" : "Instruction fault"), regs, type); } switch (type) { - case 0: /* breakpoint */ - if (ptrace_cancel_bpt(current)) - regs->pc -= 4; /* make pc point to former bpt */ - + case IF_BREAKPOINT: /* gdb do pc-4 for sigtrap */ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0); return; - case 1: /* bugcheck */ - force_sig_fault(SIGTRAP, TRAP_UNK, (void __user *)regs->pc, 0); - return; - - case 2: /* gentrap */ + case IF_GENTRAP: + regs->pc -= 4; switch ((long)regs->r16) { case GEN_INTOVF: signo = SIGFPE; @@ -230,6 +245,7 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) case GEN_SUBRNG6: case GEN_SUBRNG7: default: + regs->pc += 4; signo = SIGTRAP; code = TRAP_UNK; break; @@ -238,7 +254,11 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) force_sig_fault(signo, code, (void __user *)regs->pc, regs->r16); return; - case 4: /* opDEC */ + case IF_FEN: + fpu_enable(); + return; + + case IF_OPDEC: switch (inst) { case BREAK_KPROBE: if (notify_die(DIE_BREAK, "kprobe", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) @@ -253,27 +273,15 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) if (notify_die(DIE_UPROBE_XOL, "uprobe_xol", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) return; } - if ((regs->ps & ~IPL_MAX) == 0) - die_if_kernel("Instruction fault", regs, type); - break; - - case 3: /* FEN fault */ - /* - * Irritating users can call HMC_clrfen to disable the - * FPU for the process. The kernel will then trap to - * save and restore the FP registers. - * Given that GCC by default generates code that uses the - * FP registers, HMC_clrfen is not useful except for DoS - * attacks. So turn the bleeding FPU back on and be done - * with it. - */ - current_thread_info()->pcb.flags |= 1; - __reload_thread(¤t_thread_info()->pcb); - return; + if (user_mode(regs)) + regs->pc -= 4; + else + die("Instruction fault", regs, type); + break; - case 5: /* illoc */ default: /* unexpected instruction-fault type */ + regs->pc -= 4; break; } @@ -490,21 +498,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg, * Since the registers are in a weird format, dump them ourselves. */ - printk("%s(%d): unhandled unaligned exception\n", - current->comm, task_pid_nr(current)); - - dik_show_regs(regs); - dik_show_code((unsigned int *)pc); - show_stack(current, NULL, KERN_EMERG); - - if (test_and_set_thread_flag(TIF_DIE_IF_KERNEL)) { - printk("die_if_kernel recursion detected.\n"); - local_irq_enable(); - while (1) - asm("nop"); - } - do_exit(SIGSEGV); - + die("Unhandled unaligned exception", regs, error); } /* diff --git a/arch/sw_64/kernel/vmlinux.lds.S b/arch/sw_64/kernel/vmlinux.lds.S index a106be42121f58d8b6af8b8776944fe946b85a75..07bc3d8ee7e47bcc98c2c3de0635ab1f152b3662 100644 --- a/arch/sw_64/kernel/vmlinux.lds.S +++ b/arch/sw_64/kernel/vmlinux.lds.S @@ -33,7 +33,7 @@ SECTIONS } :text _etext = .; /* End of text section */ - RO_DATA(4096) + RO_DATA(PAGE_SIZE) /* Will be freed after init */ __init_begin = ALIGN(PAGE_SIZE); diff --git a/arch/sw_64/kvm/Kconfig b/arch/sw_64/kvm/Kconfig index 85323b48f56438f9e237ccf57a9f308cd8d20e1a..4b6201ff5dc80bacfe731c0d3469d847caeaef62 100644 --- a/arch/sw_64/kvm/Kconfig +++ b/arch/sw_64/kvm/Kconfig @@ -44,7 +44,7 @@ config KVM_SW64_HOST config KVM_MEMHOTPLUG bool "Memory hotplug support for guest" - depends on KVM + depends on KVM && MEMORY_HOTPLUG help Provides memory hotplug support for SW64 guest. diff --git a/arch/sw_64/kvm/kvm-sw64.c b/arch/sw_64/kvm/kvm-sw64.c index de81f7efe01a5fa219a4aacc3664c6cefed86704..825fe39f0494d61bb8023d72b07b4c8fb6ca74da 100644 --- a/arch/sw_64/kvm/kvm-sw64.c +++ b/arch/sw_64/kvm/kvm-sw64.c @@ -21,7 +21,9 @@ bool set_msi_flag; unsigned long sw64_kvm_last_vpn[NR_CPUS]; -__read_mostly bool bind_vcpu_enabled; +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_NUMA) +extern bool bind_vcpu_enabled; +#endif #define cpu_last_vpn(cpuid) sw64_kvm_last_vpn[cpuid] #ifdef CONFIG_SUBARCH_C3B @@ -306,6 +308,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (change == KVM_MR_FLAGS_ONLY || change == KVM_MR_DELETE) return 0; + if (test_bit(IO_MARK_BIT, &(mem->guest_phys_addr))) + return 0; + + if (test_bit(IO_MARK_BIT + 1, &(mem->guest_phys_addr))) + return 0; + #ifndef CONFIG_KVM_MEMHOTPLUG if (mem->guest_phys_addr) { pr_info("%s, No KVM MEMHOTPLUG support!\n", __func__); @@ -313,12 +321,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, } #endif - if (test_bit(IO_MARK_BIT, &(mem->guest_phys_addr))) - return 0; - - if (test_bit(IO_MARK_BIT + 1, &(mem->guest_phys_addr))) - return 0; - if (!sw64_kvm_pool) return -ENOMEM; @@ -409,6 +411,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) { unsigned long addr = vcpu->kvm->arch.host_phys_addr; + hrtimer_cancel(&vcpu->arch.hrt); vcpu->arch.vcb.whami = vcpu->vcpu_id; vcpu->arch.vcb.vcpu_irq_disabled = 1; vcpu->arch.pcpu_id = -1; /* force flush tlb for the first time */ @@ -539,6 +542,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu->arch.vcb.vpcr = get_vpcr(vcpu->kvm->arch.host_phys_addr, vcpu->kvm->arch.size, 0); +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_NUMA) if (unlikely(bind_vcpu_enabled)) { int nid; unsigned long end; @@ -548,11 +552,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (pfn_to_nid(PHYS_PFN(end)) == nid) set_cpus_allowed_ptr(vcpu->arch.tsk, node_to_cpumask_map[nid]); } -#else +#endif +#else /* !CONFIG_KVM_MEMHOTPLUG */ unsigned long seg_base = virt_to_phys(vcpu->kvm->arch.seg_pgd); vcpu->arch.vcb.vpcr = get_vpcr_memhp(seg_base, 0); -#endif +#endif /* CONFIG_KVM_MEMHOTPLUG */ vcpu->arch.vcb.upcr = 0x7; } diff --git a/arch/sw_64/lib/csum_partial_copy.c b/arch/sw_64/lib/csum_partial_copy.c index 742dd63cdb702c5980adc5aa9cec898948105303..1a8c18757e095f289d4bed109d37cd4b6c0f2dbb 100644 --- a/arch/sw_64/lib/csum_partial_copy.c +++ b/arch/sw_64/lib/csum_partial_copy.c @@ -61,10 +61,7 @@ csum_partial_cfu_dest_aligned(const unsigned long __user *src, unsigned long checksum = ~0U; int err = 0; - if (likely(!uaccess_kernel())) - err = __copy_from_user(dst, src, len + 8); - else - memcpy(dst, src, len + 8); + err = __copy_from_user(dst, src, len+8); while (len > 0) { word = *dst; @@ -93,10 +90,7 @@ csum_partial_cfu_dest_unaligned(const unsigned long __user *src, unsigned long checksum = ~0U; int err = 0; - if (likely(!uaccess_kernel())) - err = __copy_from_user(dst, src, len + 8); - else - memcpy(dst, src, len + 8); + err = __copy_from_user(dst, src, len+8); dst = (unsigned long *)((unsigned long)dst & (~7UL)); word = *dst; diff --git a/arch/sw_64/lib/deep-copy_template.S b/arch/sw_64/lib/deep-copy_template.S new file mode 100644 index 0000000000000000000000000000000000000000..7705eb3f36d4edab9e09c49bafbe3129086b6252 --- /dev/null +++ b/arch/sw_64/lib/deep-copy_template.S @@ -0,0 +1,301 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * template for memcpy and copy_user with SIMD + * + * $4: 8-byte misalignment of src when dest is 8-byte aligned + * $5: 32-byte misalignment of src when dest is 32-byte aligned + * $7: SIMD status + * 0: not in simd loop + * 1: in simd loop + * 2: in simd_u loop + * $16: latest dest, clobbered + * $17: latest src, clobbered + * $18: bytes left to copy + * + */ + +#define NC_STORE_THRESHOLD 2048 + +#define SAVE_SIMD_REGS \ + ldi $sp, -0x60($sp); \ + addl $sp, 0x1f, $23; \ + bic $23, 0x1f, $23; \ + vstd $f1, 0($23); \ + vstd $f2, 0x20($23); \ + ldi $7, 1 + +#define RESTORE_SIMD_REGS \ + addl $sp, 0x1f, $23; \ + bic $23, 0x1f, $23; \ + vldd $f1, 0($23); \ + vldd $f2, 0x20($23); \ + ldi $sp, 0x60($sp); \ + bis $31, $31, $7 + +#define SAVE_SIMD_U_REGS \ + ldi $sp, -0xc0($sp); \ + addl $sp, 0x1f, $23; \ + bic $23, 0x1f, $23; \ + vstd $f1, 0($23); \ + vstd $f2, 0x20($23); \ + vstd $f4, 0x40($23); \ + vstd $f5, 0x60($23); \ + vstd $f3, 0x80($23); \ + ldi $7, 2 + +#define RESTORE_SIMD_U_REGS \ + addl $sp, 0x1f, $23; \ + bic $23, 0x1f, $23; \ + vldd $f1, 0($23); \ + vldd $f2, 0x20($23); \ + vldd $f4, 0x40($23); \ + vldd $f5, 0x60($23); \ + vldd $f3, 0x80($23); \ + ldi $sp, 0xc0($sp); \ + bis $31, $31, $7 + + ble $18, $out + and $16, 7, $1 + beq $1, $dest_aligned_8 + +$byte_loop_head: + FIXUP_LDST( ldbu $2, 0($17) ) + FIXUP_LDST( stb $2, 0($16) ) + subl $18, 1, $18 + addl $17, 1, $17 + addl $16, 1, $16 + ble $18, $out + and $16, 7, $1 + bne $1, $byte_loop_head + +$dest_aligned_8: + and $17, 7, $4 + cmplt $18, 16, $1 + bne $1, $quad_loop_end + and $16, 31, $1 + beq $1, $dest_aligned_32 + cmplt $18, 64, $1 + bne $1, $simd_end + bne $4, $quad_u_loop_head + +$quad_loop_head: + FIXUP_LDST( ldl $2, 0($17) ) + FIXUP_LDST( stl $2, 0($16) ) + addl $16, 8, $16 + addl $17, 8, $17 + subl $18, 8, $18 + and $16, 31, $1 + beq $1, $dest_aligned_32 + br $31, $quad_loop_head + +$dest_aligned_32: + cmplt $18, 64, $1 + bne $1, $simd_end + and $17, 31, $5 + bne $5, $prep_simd_u_loop + +$prep_simd_loop: + SAVE_SIMD_REGS + ldi $1, NC_STORE_THRESHOLD($31) + cmple $18, $1, $1 + bne $1, $simd_loop + + .align 4 +$simd_loop_nc: + FIXUP_LDST( vldd $f1, 0($17) ) + FIXUP_LDST( vldd $f2, 32($17) ) + FIXUP_LDST( vstd_nc $f1, 0($16) ) + FIXUP_LDST( vstd_nc $f2, 32($16) ) + subl $18, 64, $18 + addl $17, 64, $17 + addl $16, 64, $16 + cmplt $18, 64, $1 + beq $1, $simd_loop_nc + memb # required for _nc store instructions + br $31, $simd_loop_end + + .align 4 +$simd_loop: + FIXUP_LDST( vldd $f1, 0($17) ) + FIXUP_LDST( vldd $f2, 32($17) ) + FIXUP_LDST( vstd $f1, 0($16) ) + FIXUP_LDST( vstd $f2, 32($16) ) + subl $18, 64, $18 + addl $17, 64, $17 + addl $16, 64, $16 + cmplt $18, 64, $1 + beq $1, $simd_loop + +$simd_loop_end: + cmplt $18, 32, $1 + bne $1, $no_more_simd + FIXUP_LDST( vldd $f1, 0($17) ) + FIXUP_LDST( vstd $f1, 0($16) ) + subl $18, 32, $18 + addl $17, 32, $17 + addl $16, 32, $16 + +$no_more_simd: + RESTORE_SIMD_REGS + +$simd_end: + ble $18, $out + cmplt $18, 16, $1 + bne $1, $quad_loop_end + bne $4, $prep_quad_u_loop_tail + + .align 4 +$quad_loop_tail: + FIXUP_LDST( ldl $2, 0($17) ) + FIXUP_LDST( ldl $3, 8($17) ) + FIXUP_LDST( stl $2, 0($16) ) + FIXUP_LDST( stl $3, 8($16) ) + subl $18, 16, $18 + addl $17, 16, $17 + addl $16, 16, $16 + cmplt $18, 16, $1 + beq $1, $quad_loop_tail + +$quad_loop_end: + ble $18, $out + cmplt $18, 8, $1 + bne $1, $byte_loop_tail + bne $4, $move_one_quad_u + +$move_one_quad: + FIXUP_LDST( ldl $2, 0($17) ) + FIXUP_LDST( stl $2, 0($16) ) + subl $18, 8, $18 + addl $17, 8, $17 + addl $16, 8, $16 + ble $18, $out + + .align 3 +$byte_loop_tail: + FIXUP_LDST( ldbu $2, 0($17) ) + FIXUP_LDST( stb $2, 0($16) ) + subl $18, 1, $18 + addl $17, 1, $17 + addl $16, 1, $16 + bgt $18, $byte_loop_tail + br $31, $out + +/* misaligned src and dst */ +$quad_u_loop_head: + FIXUP_LDST( ldl_u $2, 0($17) ) + FIXUP_LDST( ldl_u $3, 7($17) ) + extll $2, $4, $2 + exthl $3, $4, $3 + bis $2, $3, $2 + FIXUP_LDST( stl $2, 0($16) ) + addl $16, 8, $16 + addl $17, 8, $17 + subl $18, 8, $18 + and $16, 31, $1 + beq $1, $dest_aligned_32 + br $31, $quad_u_loop_head + +$prep_simd_u_loop: + SAVE_SIMD_U_REGS + andnot $17, 31, $3 + ldi $2, 256($31) + sll $5, 3, $1 + subl $2, $1, $2 + sll $1, 29, $1 + sll $2, 29, $2 + ifmovd $1, $f1 + ifmovd $2, $f2 + FIXUP_LDST( vldd $f4, 0($3) ) + ldi $1, NC_STORE_THRESHOLD($31) + cmple $18, $1, $1 + bne $1, $simd_u_loop + + .align 4 +$simd_u_loop_nc: + FIXUP_LDST( vldd $f5, 32($3) ) + srlow $f4, $f1, $f4 + sllow $f5, $f2, $f3 + vlogfc $f3, $f4, $f31, $f3 + FIXUP_LDST( vstd_nc $f3, 0($16) ) + FIXUP_LDST( vldd $f4, 64($3) ) + srlow $f5, $f1, $f5 + sllow $f4, $f2, $f3 + vlogfc $f5, $f3, $f31, $f5 + FIXUP_LDST( vstd_nc $f5, 32($16) ) + subl $18, 64, $18 + addl $3, 64, $3 + addl $16, 64, $16 + cmplt $18, 64, $1 + beq $1, $simd_u_loop_nc + memb # required for _nc store instructions + br $31, $simd_u_loop_end + + .align 4 +$simd_u_loop: + FIXUP_LDST( vldd $f5, 32($3) ) + srlow $f4, $f1, $f4 + sllow $f5, $f2, $f3 + vlogfc $f4, $f3, $f31, $f3 + FIXUP_LDST( vstd $f3, 0($16) ) + FIXUP_LDST( vldd $f4, 64($3) ) + srlow $f5, $f1, $f5 + sllow $f4, $f2, $f3 + vlogfc $f5, $f3, $f31, $f3 + FIXUP_LDST( vstd $f3, 32($16) ) + subl $18, 64, $18 + addl $3, 64, $3 + addl $16, 64, $16 + cmplt $18, 64, $1 + beq $1, $simd_u_loop + +$simd_u_loop_end: + cmplt $18, 32, $1 + bne $1, $no_more_simd_u + FIXUP_LDST( vldd $f5, 32($3) ) + srlow $f4, $f1, $f4 + sllow $f5, $f2, $f3 + vlogfc $f4, $f3, $f31, $f3 + FIXUP_LDST( vstd $f3, 0($16) ) + subl $18, 32, $18 + addl $3, 32, $3 + addl $16, 32, $16 + +$no_more_simd_u: + RESTORE_SIMD_U_REGS + bis $3, $5, $17 + br $31, $simd_end + +$prep_quad_u_loop_tail: + FIXUP_LDST( ldl_u $2, 0($17) ) + .align 4 +$quad_u_loop_tail: + FIXUP_LDST( ldl_u $3, 8($17) ) + extll $2, $4, $22 + exthl $3, $4, $23 + bis $22, $23, $22 + FIXUP_LDST( stl $22, 0($16) ) + FIXUP_LDST( ldl_u $2, 16($17) ) + extll $3, $4, $24 + exthl $2, $4, $25 + bis $24, $25, $24 + FIXUP_LDST( stl $24, 8($16) ) + subl $18, 16, $18 + addl $17, 16, $17 + addl $16, 16, $16 + cmplt $18, 16, $1 + beq $1, $quad_u_loop_tail + br $31, $quad_loop_end + +$move_one_quad_u: + FIXUP_LDST( ldl_u $2, 0($17) ) + FIXUP_LDST( ldl_u $3, 8($17) ) + extll $2, $4, $22 + exthl $3, $4, $23 + bis $22, $23, $22 + FIXUP_LDST( stl $22, 0($16) ) + subl $18, 8, $18 + addl $17, 8, $17 + addl $16, 8, $16 + ble $18, $out + br $31, $byte_loop_tail diff --git a/arch/sw_64/lib/deep-copy_user.S b/arch/sw_64/lib/deep-copy_user.S index 631246c68bab476371e95f2c184d0f9b2d13c427..327cab322765ab2f4758812cd178d437726eb44d 100644 --- a/arch/sw_64/lib/deep-copy_user.S +++ b/arch/sw_64/lib/deep-copy_user.S @@ -1,342 +1,43 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copy to/from user space, handling exceptions as we go.. This - * isn't exactly pretty. - * - * This is essentially the same as "memcpy()", but with a few twists. - * Notably, we have to make sure that $18 is always up-to-date and - * contains the right "bytes left to copy" value (and that it is updated - * only _after_ a successful copy). There is also some rather minor - * exception setup stuff.. - * - * Inputs: - * length in $18 - * destination address in $16 - * source address in $17 - * return address in $26 - * - * Outputs: - * bytes left to copy in $0 - * - * Clobbers: - * $1,$2,$3,$4,$5,$16,$17 - * - */ -/* Author: Copy_user simd version 1.1 (20190904) by Gao Xiuwu. -*/ #include /* Allow an exception for an insn; exit if we get one. */ -#define EXI(x, y...) \ - 99: x, ##y; \ - .section __ex_table, "a"; \ - .long 99b - .; \ - ldi $31, $exitin-99b($31); \ - .previous - -#define EXO(x,y...) \ - 99: x, ##y; \ +#define FIXUP_LDST(x, y) \ + 99: x, y; \ .section __ex_table, "a"; \ .long 99b - .; \ - ldi $31, $exitout-99b($31); \ + ldi $31, $out-99b($31); \ .previous - .set noat - .align 4 +/* + * $7: SIMD status + * 0: not in simd loop + * 1: in simd loop + * 2: in simd_u loop + * $18: bytes left to copy + * + */ .globl __copy_user .ent __copy_user - __copy_user: .prologue 0 - subl $18, 32, $1 - beq $18, $zerolength - - and $16, 7, $3 - ble $1, $onebyteloop - beq $3, $destaligned - subl $3, 8, $3 -/* - * The fetcher stall also hides the 1 cycle cross-cluster stall for $3 (L --> U) - * This loop aligns the destination a byte at a time - * We know we have at least one trip through this loop - */ -$aligndest: - EXI(ldbu $1, 0($17)) - addl $16, 1, $16 - addl $3, 1, $3 - -/* - * the -1 is to compensate for the inc($16) done in a previous quadpack - * which allows us zero dependencies within either quadpack in the loop - */ - EXO(stb $1, -1($16)) - addl $17, 1, $17 - subl $18, 1, $18 - bne $3, $aligndest - -/* - * If we fell through into here, we have a minimum of 33 - 7 bytes - * If we arrived via branch, we have a minimum of 32 bytes - */ -$destaligned: - and $17, 7, $1 - bic $18, 7, $4 - #EXI(ldl_u $3, 0($17)) - beq $1, $quadaligned - -#ifndef MISQUAD_SCALAR -$misquad: - and $16, 31, $1 - beq $1, $dest32Baligned - -$align_32B: - EXI(ldbu $1, 0($17)) - addl $17, 1, $17 - EXO(stb $1, 0($16)) - subl $18, 1, $18 - addl $16, 1, $16 - and $16, 31, $1 - beq $18, $exitout - bne $1, $align_32B - -$dest32Baligned: - ldi $2, 256($31) - andnot $17, 31, $3 - EXI(vldd $f10, 0($3)) - and $17, 31, $5 - sll $5, 3, $5 - subw $2, $5, $4 - ifmovs $5, $f15 - ifmovs $4, $f14 - - cmple $18, 63, $1 - bne $1, $misalign_tail_simd - -$misalign_body_simd: - EXI(vldd $f11, 32($3)) - fillcs 128*5($3) - - srlow $f10, $f15, $f12 - sllow $f11, $f14, $f13 - #fillde 128*5($16) - vlogfc $f12, $f13, $f31, $f12 - - EXI(vldd $f10, 64($3)) - srlow $f11, $f15, $f22 - sllow $f10, $f14, $f23 - vlogfc $f22, $f23, $f31, $f22 - - EXO(vstd $f12, 0($16)) - EXO(vstd $f22, 32($16)) - - addl $16, 64, $16 - addl $3, 64, $3 - subl $18, 64, $18 - - cmple $18, 63, $1 - beq $1, $misalign_body_simd - br $misalign_tail_simd - -$misalign_tail_simd: - cmple $18, 31, $1 - bne $1, $before_misalign_tail_quads - - EXI(vldd $f11, 32($3)) - srlow $f10, $f15, $f12 - sllow $f11, $f14, $f13 - vlogfc $f12, $f13, $f31, $f12 - - EXO(vstd $f12, 0($16)) - - subl $18, 32, $18 - addl $16, 32, $16 - addl $3, 32, $3 - vfmov $f11, $f10 - -$before_misalign_tail_quads: - srlow $f10, $f15, $f12 - s8subl $18, $4, $1 - ble $1, $tail_quads - - EXI(vldd $f11, 32($3)) - sllow $f11, $f14, $f13 - vlogfc $f12, $f13, $f31, $f12 - -$tail_quads: - subl $18, 8, $1 - blt $1, $less_than_8 - -$move_a_quad: - fimovd $f12, $1 - srlow $f12, 64, $f12 - - EXO(stl $1, 0($16)) - subl $18, 8, $18 - addl $16, 8, $16 - subl $18, 8, $1 - bge $1, $move_a_quad - -$less_than_8: - .align 4 - beq $18, $exitout - fimovd $f12, $1 - -$tail_bytes: - EXO(stb $1, 0($16)) - subl $18, 1, $18 - srl $1, 8, $1 - addl $16, 1, $16 - bgt $18, $tail_bytes - br $exitout -#else - -/* - * In the worst case, we've just executed an ldl_u here from 0($17) - * and we'll repeat it once if we take the branch - */ - -/* Misaligned quadword loop - not unrolled. Leave it that way. */ -$misquad: - EXI(ldl_u $2, 8($17)) - subl $4, 8, $4 - extll $3, $17, $3 - exthl $2, $17, $1 - - bis $3, $1, $1 - EXO(stl $1, 0($16)) - addl $17, 8, $17 - subl $18, 8, $18 - - addl $16, 8, $16 - bis $2, $2, $3 - bne $4, $misquad - - beq $18, $zerolength - -/* We know we have at least one trip through the byte loop */ - EXI(ldbu $2, 0($17)) - addl $16, 1, $16 - br $31, $dirtyentry -#endif -/* Do the trailing byte loop load, then hop into the store part of the loop */ - -/* - * A minimum of (33 - 7) bytes to do a quad at a time. - * Based upon the usage context, it's worth the effort to unroll this loop - * $18 - number of bytes to be moved - * $4 - number of bytes to move as quadwords - * $16 is current destination address - * $17 is current source address - */ - -$quadaligned: - and $16, 31, $1 - beq $1, $quadaligned_dest32Baligned - -$quadaligned_align_32B: - EXI(ldl $1, 0($17)) - addl $17, 8, $17 - EXO(stl $1, 0($16)) - subl $18, 8, $18 - subl $4, 8, $4 - addl $16, 8, $16 - and $16, 31, $1 - beq $4, $onebyteloop - bne $1, $quadaligned_align_32B - -$quadaligned_dest32Baligned: - and $17, 31, $2 - bne $2, $dest32Baligned - -$quad32Bailgned: - subl $4, 64, $2 - blt $2, $onequad - -/* - * There is a significant assumption here that the source and destination - * addresses differ by more than 32 bytes. In this particular case, a - * sparsity of registers further bounds this to be a minimum of 8 bytes. - * But if this isn't met, then the output result will be incorrect. - * Furthermore, due to a lack of available registers, we really can't - * unroll this to be an 8x loop (which would enable us to use the wh64 - * instruction memory hint instruction). - */ - -$simd_quadalign_unroll2: - fillcs 128 * 5($17) - EXI(vldd $f22, 0($17)) - EXI(vldd $f23, 32($17)) - EXO(vstd $f22, 0($16)) - EXO(vstd $f23, 32($16)) - #fillde 128 * 5($16) - subl $4, 64, $4 - subl $18, 64, $18 - addl $17, 64, $17 - addl $16, 64, $16 - subl $4, 64, $3 - bge $3, $simd_quadalign_unroll2 - bne $4, $onequad - br $31, $noquads - -$onequad: - EXI(ldl $1, 0($17)) - subl $4, 8, $4 - addl $17, 8, $17 - - EXO(stl $1, 0($16)) - subl $18, 8, $18 - addl $16, 8, $16 - bne $4, $onequad - -$noquads: - beq $18, $zerolength - -/* - * For small copies (or the tail of a larger copy), do a very simple byte loop. - * There's no point in doing a lot of complex alignment calculations to try to - * to quadword stuff for a small amount of data. - * $18 - remaining number of bytes left to copy - * $16 - current dest addr - * $17 - current source addr - */ - -$onebyteloop: - EXI(ldbu $2, 0($17)) - addl $16, 1, $16 - -$dirtyentry: -/* - * the -1 is to compensate for the inc($16) done in a previous quadpack - * which allows us zero dependencies within either quadpack in the loop - */ - EXO(stb $2, -1($16)) - addl $17, 1, $17 - subl $18, 1, $18 - bgt $18, $onebyteloop - -$zerolength: -$exitout: + bis $31, $31, $7 +#include "deep-copy_template.S" +$out: bis $31, $18, $0 - ret $31, ($26), 1 + beq $7, $return + subl $7, 1, $7 + beq $7, $restore_simd -$exitin: +$restore_simd_u: + RESTORE_SIMD_U_REGS + br $31, $return - /* A stupid byte-by-byte zeroing of the rest of the output - * buffer. This cures security holes by never leaving - * random kernel data around to be copied elsewhere. - */ - - mov $18, $1 - -$101: - EXO(stb $31, 0($16)) - subl $1, 1, $1 - addl $16, 1, $16 - bgt $1, $101 - - bis $31, $18, $0 - ret $31, ($26), 1 +$restore_simd: + RESTORE_SIMD_REGS +$return: + ret .end __copy_user EXPORT_SYMBOL(__copy_user) diff --git a/arch/sw_64/lib/deep-memcpy.S b/arch/sw_64/lib/deep-memcpy.S index 83c726d42778ef7d85758236e9d7cac601b8548d..c4b5bf3d26dfd55be9e701d26a68b9abe6361e93 100644 --- a/arch/sw_64/lib/deep-memcpy.S +++ b/arch/sw_64/lib/deep-memcpy.S @@ -2,307 +2,18 @@ #include -#define NC_STORE_THRESHOLD 2048 +#define FIXUP_LDST(x, y) \ + x, y -#define SAVE_SIMD_REGS \ - ldi $sp, -0x60($sp); \ - addl $sp, 0x1f, $23; \ - bic $23, 0x1f, $23; \ - vstd $f1, 0($23); \ - vstd $f2, 0x20($23) - -#define RESTORE_SIMD_REGS \ - addl $sp, 0x1f, $23; \ - bic $23, 0x1f, $23; \ - vldd $f1, 0($23); \ - vldd $f2, 0x20($23); \ - ldi $sp, 0x60($sp) - -#define SAVE_SIMD_U_REGS \ - ldi $sp, -0x120($sp); \ - addl $sp, 0x1f, $23; \ - bic $23, 0x1f, $23; \ - vstd $f1, 0($23); \ - vstd $f2, 0x20($23); \ - vstd $f4, 0x40($23); \ - vstd $f5, 0x60($23); \ - vstd $f10, 0x80($23); \ - vstd $f11, 0xa0($23); \ - vstd $f20, 0xc0($23); \ - vstd $f21, 0xe0($23) - -#define RESTORE_SIMD_U_REGS \ - addl $sp, 0x1f, $23; \ - bic $23, 0x1f, $23; \ - vldd $f1, 0($23); \ - vldd $f2, 0x20($23); \ - vldd $f4, 0x40($23); \ - vldd $f5, 0x60($23); \ - vldd $f10, 0x80($23); \ - vldd $f11, 0xa0($23); \ - vldd $f20, 0xc0($23); \ - vldd $f21, 0xe0($23); \ - ldi $sp, 0x120($sp) - - .set noat - .align 4 .globl memcpy .ent memcpy memcpy: .frame $30, 0, $26, 0 .prologue 0 - mov $16, $0 - ble $18, $out - and $16, 7, $1 - beq $1, $dest_aligned_8 - - .align 4 -$byte_loop_head: - ldbu $2, 0($17) - subl $18, 1, $18 - addl $17, 1, $17 - stb $2, 0($16) - addl $16, 1, $16 - ble $18, $out - and $16, 7, $1 - bne $1, $byte_loop_head - -$dest_aligned_8: - and $17, 7, $4 - subl $18, 16, $18 - blt $18, $quad_end - subl $18, 64, $18 - blt $18, $simd_end - and $16, 31, $1 - beq $1, $dest_aligned_32 - bne $4, $quad_u_loop_head - - .align 5 -$quad_loop_head: - ldl $2, 0($17) - subl $18, 8, $18 - addl $17, 8, $17 - stl $2, 0($16) - addl $16, 8, $16 - and $16, 31, $1 - blt $18, $simd_end - beq $16, $dest_aligned_32 - br $31, $quad_loop_head - -$dest_aligned_32: - and $17, 31, $5 - bne $5, $prep_simd_u_loop - -$prep_simd_loop: - SAVE_SIMD_REGS - ldi $1, NC_STORE_THRESHOLD($31) - cmple $18, $1, $1 - bne $1, $simd_loop - - .align 5 -$simd_loop_nc: - fillcs 128 * 5($17) - vldd $f1, 0($17) - vldd $f2, 32($17) - subl $18, 64, $18 - addl $17, 64, $17 - vstd_nc $f1, 0($16) - vstd_nc $f2, 32($16) - addl $16, 64, $16 - bge $18, $simd_loop_nc - memb # required for _nc store instructions - br $31, $simd_loop_end - - .align 5 -$simd_loop: - fillcs 128 * 5($17) - vldd $f1, 0($17) - vldd $f2, 32($17) - subl $18, 64, $18 - addl $17, 64, $17 - vstd $f1, 0($16) - vstd $f2, 32($16) - addl $16, 64, $16 - bge $18, $simd_loop - -$simd_loop_end: - addl $18, 64, $1 - cmplt $1, 32, $1 - bne $1, $no_more_simd - vldd $f1, 0($17) - subl $18, 32, $18 - addl $17, 32, $17 - vstd $f1, 0($16) - addl $16, 32, $16 - -$no_more_simd: - RESTORE_SIMD_REGS - -$simd_end: - addl $18, 64, $18 - blt $18, $quad_end - bne $4, $prep_quad_u_loop_tail - - .align 4 -$quad_loop_tail: - ldl $2, 0($17) - ldl $3, 8($17) - subl $18, 16, $18 - addl $17, 16, $17 - stl $2, 0($16) - stl $3, 8($16) - addl $16, 16, $16 - bge $18, $quad_loop_tail - -$quad_end: - addl $18, 16, $18 - ble $18, $out - cmplt $18, 8, $1 - bne $1, $byte_loop_tail - bne $4, $move_one_quad_u - -$move_one_quad: - ldl $2, 0($17) - subl $18, 8, $18 - addl $17, 8, $17 - stl $2, 0($16) - addl $16, 8, $16 - ble $18, $out - - .align 4 -$byte_loop_tail: - ldbu $2, 0($17) - subl $18, 1, $18 - addl $17, 1, $17 - stb $2, 0($16) - addl $16, 1, $16 - bgt $18, $byte_loop_tail - +#include "deep-copy_template.S" $out: - ret $31, ($26), 1 - - - - .align 5 -$quad_u_loop_head: - ldl_u $2, 0($17) - ldl_u $3, 7($17) - subl $18, 8, $18 - addl $17, 8, $17 - extll $2, $4, $2 - exthl $3, $4, $3 - bis $2, $3, $2 - stl $2, 0($16) - addl $16, 8, $16 - blt $18, $simd_end - beq $16, $dest_aligned_32 - br $31, $quad_u_loop_head - -$prep_simd_u_loop: - SAVE_SIMD_U_REGS - andnot $17, 31, $3 - ldi $2, 256($31) - sll $5, 3, $1 - subl $2, $1, $2 - sll $1, 29, $1 - sll $2, 29, $2 - ifmovd $1, $f1 - ifmovd $2, $f2 - vldd $f4, 0($3) - ldi $1, NC_STORE_THRESHOLD($31) - cmple $18, $1, $1 - bne $1, $simd_u_loop - - .align 5 -$simd_u_loop_nc: - vldd $f5, 32($3) - fillcs 128 * 5($3) - srlow $f4, $f1, $f10 - sllow $f5, $f2, $f11 - vlogfc $f10, $f11, $f31, $f10 - vldd $f4, 64($3) - srlow $f5, $f1, $f20 - sllow $f4, $f2, $f21 - vlogfc $f20, $f21, $f31, $f20 - vstd_nc $f10, 0($16) - vstd_nc $f20, 32($16) - subl $18, 64, $18 - addl $3, 64, $3 - addl $16, 64, $16 - bge $18, $simd_u_loop_nc - memb # required for _nc store instructions - br $31, $simd_u_loop_end - - .align 5 -$simd_u_loop: - vldd $f5, 32($3) - fillcs 128 * 5($3) - srlow $f4, $f1, $f10 - sllow $f5, $f2, $f11 - vlogfc $f10, $f11, $f31, $f10 - vldd $f4, 64($3) - srlow $f5, $f1, $f20 - sllow $f4, $f2, $f21 - vlogfc $f20, $f21, $f31, $f20 - vstd $f10, 0($16) - vstd $f20, 32($16) - subl $18, 64, $18 - addl $3, 64, $3 - addl $16, 64, $16 - bge $18, $simd_u_loop - -$simd_u_loop_end: - addl $18, 64, $1 - cmplt $1, 32, $1 - bne $1, $no_more_simd_u - vldd $f5, 32($3) - srlow $f4, $f1, $f10 - sllow $f5, $f2, $f11 - vlogfc $f10, $f11, $f31, $f10 - vstd $f10, 0($16) - subl $18, 32, $18 - addl $3, 32, $3 - addl $16, 32, $16 - -$no_more_simd_u: - RESTORE_SIMD_U_REGS - bis $3, $5, $17 - br $31, $simd_end - -$prep_quad_u_loop_tail: - ldl_u $2, 0($17) - .align 5 -$quad_u_loop_tail: - ldl_u $3, 8($17) - extll $2, $4, $22 - exthl $3, $4, $23 - bis $22, $23, $22 - stl $22, 0($16) - ldl_u $2, 16($17) - extll $3, $4, $24 - exthl $2, $4, $25 - bis $24, $25, $24 - stl $24, 8($16) - subl $18, 16, $18 - addl $17, 16, $17 - addl $16, 16, $16 - bge $18, $quad_u_loop_tail - br $31, $quad_end - -$move_one_quad_u: - ldl_u $2, 0($17) - ldl_u $3, 8($17) - subl $18, 8, $18 - addl $17, 8, $17 - extll $2, $4, $22 - exthl $3, $4, $23 - bis $22, $23, $22 - stl $22, 0($16) - addl $16, 8, $16 - ble $18, $out - br $31, $byte_loop_tail - + ret .end memcpy EXPORT_SYMBOL(memcpy) __memcpy = memcpy diff --git a/arch/sw_64/lib/iomap_copy.c b/arch/sw_64/lib/iomap_copy.c index 10e756fffff5ec8a97671ad06d9abe2a9e8f4aaf..1c75bd602d7e7fcd01591cdcbdd73b8e6d258aec 100644 --- a/arch/sw_64/lib/iomap_copy.c +++ b/arch/sw_64/lib/iomap_copy.c @@ -41,15 +41,12 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) { -#ifdef CONFIG_64BIT u64 __iomem *dst = to; const u64 *src = from; const u64 *end = src + count; - while (src < end) + while (src < end) { __raw_writeq(*src++, dst++); mb(); -#else - __iowrite32_copy(to, from, count * 2); -#endif + } } diff --git a/arch/sw_64/lib/udelay.c b/arch/sw_64/lib/udelay.c index 48356ab8872f89f6f3fb75189c4fa9760f14b1bc..59ca8a97d748895a49e4fbaf83a85eee99f0d459 100644 --- a/arch/sw_64/lib/udelay.c +++ b/arch/sw_64/lib/udelay.c @@ -28,12 +28,6 @@ void __delay(unsigned long loops) } EXPORT_SYMBOL(__delay); -#ifdef CONFIG_SMP -#define LPJ cpu_data[smp_processor_id()].loops_per_jiffy -#else -#define LPJ loops_per_jiffy -#endif - void udelay(unsigned long usecs) { unsigned long loops = usecs * get_cpu_freq() / 1000000; diff --git a/arch/sw_64/mm/fault.c b/arch/sw_64/mm/fault.c index d596fc50772da73d307540ec66d82481fb5d8a37..574fe7930aacd97d3830d6e3347770f9dab4eac6 100644 --- a/arch/sw_64/mm/fault.c +++ b/arch/sw_64/mm/fault.c @@ -31,8 +31,8 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned long mmcsr) } #endif -extern void die_if_kernel(char *, struct pt_regs *, long); -extern void dik_show_regs(struct pt_regs *regs); +extern void die(char *, struct pt_regs *, long); +extern void show_regs(struct pt_regs *regs); void show_all_vma(void) { @@ -61,31 +61,6 @@ void show_all_vma(void) } } -/* - * Force a new ASN for a task. - */ - -#ifndef CONFIG_SMP -unsigned long last_asn = ASN_FIRST_VERSION; -#endif - -void -__load_new_mm_context(struct mm_struct *next_mm) -{ - unsigned long mmc; - struct pcb_struct *pcb; - - mmc = __get_new_mm_context(next_mm, smp_processor_id()); - next_mm->context.asid[smp_processor_id()] = mmc; - - pcb = ¤t_thread_info()->pcb; - pcb->asn = mmc & HARDWARE_ASN_MASK; - pcb->ptbr = virt_to_pfn(next_mm->pgd); - - __reload_thread(pcb); -} - - /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to handle_mm_fault(). @@ -301,7 +276,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, */ pr_alert("Unable to handle kernel paging request at virtual address %016lx\n", address); - die_if_kernel("Oops", regs, cause); + die("Oops", regs, cause); do_exit(SIGKILL); /* @@ -332,7 +307,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, if (unlikely(segv_debug_enabled)) { pr_info("fault: want to send_segv: pid %d, cause = %#lx, mmcsr = %#lx, address = %#lx, pc %#lx\n", current->pid, cause, mmcsr, address, regs->pc); - dik_show_regs(regs); + show_regs(regs); show_all_vma(); } diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index 82f2414ef7f77f29cac58323185a12d279a6d87e..93ec3ecdf4f1a6593dd22fab13cb3ef674a47b54 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -34,6 +34,7 @@ static pud_t vmalloc_pud[1024] __attribute__((__aligned__(PAGE_SIZE))); static phys_addr_t mem_start; static phys_addr_t mem_size_limit; +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE unsigned long memory_block_size_bytes(void) { if (is_in_guest()) @@ -41,6 +42,7 @@ unsigned long memory_block_size_bytes(void) else return MIN_MEMORY_BLOCK_SIZE; } +#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ static int __init setup_mem_size(char *p) { @@ -75,34 +77,14 @@ pgd_alloc(struct mm_struct *mm) return ret; } -static inline unsigned long -load_PCB(struct pcb_struct *pcb) -{ - register unsigned long sp __asm__("$30"); - pcb->ksp = sp; - return __reload_thread(pcb); -} - /* Set up initial PCB, VPTB, and other such nicities. */ static inline void switch_to_system_map(void) { - unsigned long newptbr; - unsigned long original_pcb_ptr; - - /* - * Initialize the kernel's page tables. Linux puts the vptb in - * the last slot of the L1 page table. - */ memset(swapper_pg_dir, 0, PAGE_SIZE); - newptbr = virt_to_pfn(swapper_pg_dir); - - /* Also set up the real kernel PCB while we're at it. */ - init_thread_info.pcb.ptbr = newptbr; - init_thread_info.pcb.flags = 1; /* set FEN, clear everything else */ - original_pcb_ptr = load_PCB(&init_thread_info.pcb); - tbia(); + wrptbr(virt_to_phys(swapper_pg_dir)); + tbiv(); } void __init callback_init(void) diff --git a/arch/sw_64/mm/physaddr.c b/arch/sw_64/mm/physaddr.c index 26769f0bf7bf976a5cee620a4aa77f3f246b15b4..17840f4ef40bea3c26011d239d7c8aa1d5e020fc 100644 --- a/arch/sw_64/mm/physaddr.c +++ b/arch/sw_64/mm/physaddr.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include unsigned long __phys_addr(unsigned long x) diff --git a/arch/sw_64/net/bpf_jit.h b/arch/sw_64/net/bpf_jit.h index 2bf3ca6f3abdb1db1d55ad846a91eba25133061f..2cf5ba5253a84dd34f87e420a59a8d999584d783 100644 --- a/arch/sw_64/net/bpf_jit.h +++ b/arch/sw_64/net/bpf_jit.h @@ -21,80 +21,82 @@ #ifndef _SW64_BPF_JIT_H #define _SW64_BPF_JIT_H +/* SW64 instruction field shift */ #define SW64_BPF_OPCODE_OFFSET 26 #define SW64_BPF_RA_OFFSET 21 #define SW64_BPF_RB_OFFSET 16 #define SW64_BPF_SIMPLE_ALU_IMM_OFFSET 13 #define SW64_BPF_SIMPLE_ALU_FUNC_OFFSET 5 #define SW64_BPF_SIMPLE_ALU_RC_OFFSET 0 +#define SW64_BPF_LS_FUNC_OFFSET 12 -#define SW64_BPF_OPCODE_BR_CALL 0x01 -#define SW64_BPF_OPCODE_BR_RET 0x02 -#define SW64_BPF_OPCODE_BR_JMP 0x03 -#define SW64_BPF_OPCODE_BR_BR 0x04 -#define SW64_BPF_OPCODE_BR_BSR 0x05 -#define SW64_BPF_OPCODE_BR_BEQ 0x30 -#define SW64_BPF_OPCODE_BR_BNE 0x31 -#define SW64_BPF_OPCODE_BR_BLT 0x32 -#define SW64_BPF_OPCODE_BR_BLE 0x33 -#define SW64_BPF_OPCODE_BR_BGT 0x34 -#define SW64_BPF_OPCODE_BR_BGE 0x35 -#define SW64_BPF_OPCODE_BR_BLBC 0x36 -#define SW64_BPF_OPCODE_BR_BLBS 0x37 - -#define SW64_BPF_OPCODE_LS_LDBU 0x20 -#define SW64_BPF_OPCODE_LS_LDHU 0x21 -#define SW64_BPF_OPCODE_LS_LDW 0x22 -#define SW64_BPF_OPCODE_LS_LDL 0x23 -#define SW64_BPF_OPCODE_LS_STB 0x28 -#define SW64_BPF_OPCODE_LS_STH 0x29 -#define SW64_BPF_OPCODE_LS_STW 0x2A -#define SW64_BPF_OPCODE_LS_STL 0x2B -#define SW64_BPF_OPCODE_LS_LDI 0x3E -#define SW64_BPF_OPCODE_LS_LDIH 0x3F - +/* SW64 instruction opcodes */ +#define SW64_BPF_OPCODE_CALL 0x01 +#define SW64_BPF_OPCODE_RET 0x02 +#define SW64_BPF_OPCODE_JMP 0x03 +#define SW64_BPF_OPCODE_BR 0x04 +#define SW64_BPF_OPCODE_BSR 0x05 +#define SW64_BPF_OPCODE_MISC 0x06 +#define SW64_BPF_OPCODE_LOCK 0x08 #define SW64_BPF_OPCODE_ALU_REG 0x10 #define SW64_BPF_OPCODE_ALU_IMM 0x12 +#define SW64_BPF_OPCODE_LDBU 0x20 +#define SW64_BPF_OPCODE_LDHU 0x21 +#define SW64_BPF_OPCODE_LDW 0x22 +#define SW64_BPF_OPCODE_LDL 0x23 +#define SW64_BPF_OPCODE_STB 0x28 +#define SW64_BPF_OPCODE_STH 0x29 +#define SW64_BPF_OPCODE_STW 0x2A +#define SW64_BPF_OPCODE_STL 0x2B +#define SW64_BPF_OPCODE_BEQ 0x30 +#define SW64_BPF_OPCODE_BNE 0x31 +#define SW64_BPF_OPCODE_BLT 0x32 +#define SW64_BPF_OPCODE_BLE 0x33 +#define SW64_BPF_OPCODE_BGT 0x34 +#define SW64_BPF_OPCODE_BGE 0x35 +#define SW64_BPF_OPCODE_BLBC 0x36 +#define SW64_BPF_OPCODE_BLBS 0x37 +#define SW64_BPF_OPCODE_LDI 0x3E +#define SW64_BPF_OPCODE_LDIH 0x3F + +/* SW64 MISC instructions function codes */ +#define SW64_BPF_FUNC_MISC_RD_F 0x1000 +#define SW64_BPF_FUNC_MISC_WR_F 0x1020 +/* SW64 LOCK instructions function codes */ +#define SW64_BPF_FUNC_LOCK_LLDW 0x0 +#define SW64_BPF_FUNC_LOCK_LLDL 0x1 +#define SW64_BPF_FUNC_LOCK_LSTW 0x8 +#define SW64_BPF_FUNC_LOCK_LSTL 0x9 + +/* SW64 ALU instructions function codes */ #define SW64_BPF_FUNC_ALU_ADDW 0x00 #define SW64_BPF_FUNC_ALU_SUBW 0x01 #define SW64_BPF_FUNC_ALU_ADDL 0x08 #define SW64_BPF_FUNC_ALU_SUBL 0x09 #define SW64_BPF_FUNC_ALU_MULW 0x10 #define SW64_BPF_FUNC_ALU_MULL 0x18 +#define SW64_BPF_FUNC_ALU_CMPEQ 0x28 +#define SW64_BPF_FUNC_ALU_CMPLT 0x29 +#define SW64_BPF_FUNC_ALU_CMPLE 0x2A +#define SW64_BPF_FUNC_ALU_CMPULT 0x2B +#define SW64_BPF_FUNC_ALU_CMPULE 0x2C +#define SW64_BPF_FUNC_ALU_AND 0x38 +#define SW64_BPF_FUNC_ALU_BIC 0x39 +#define SW64_BPF_FUNC_ALU_BIS 0x3A +#define SW64_BPF_FUNC_ALU_ORNOT 0x3B +#define SW64_BPF_FUNC_ALU_XOR 0x3C +#define SW64_BPF_FUNC_ALU_EQV 0x3D +#define SW64_BPF_FUNC_ALU_SLL 0x48 +#define SW64_BPF_FUNC_ALU_SRL 0x49 +#define SW64_BPF_FUNC_ALU_SRA 0x4A #define SW64_BPF_FUNC_ALU_ZAP 0x68 #define SW64_BPF_FUNC_ALU_ZAPNOT 0x69 #define SW64_BPF_FUNC_ALU_SEXTB 0x6A #define SW64_BPF_FUNC_ALU_SEXTH 0x6B -#define SW64_BPF_OPCODE_BS_REG 0x10 -#define SW64_BPF_OPCODE_BS_IMM 0x12 - -#define SW64_BPF_FUNC_BS_SLL 0x48 -#define SW64_BPF_FUNC_BS_SRL 0x49 -#define SW64_BPF_FUNC_BS_SRA 0x4A - -#define SW64_BPF_OPCODE_LOGIC_REG 0x10 -#define SW64_BPF_OPCODE_LOGIC_IMM 0x12 - -#define SW64_BPF_FUNC_LOGIC_AND 0x38 -#define SW64_BPF_FUNC_LOGIC_BIC 0x39 -#define SW64_BPF_FUNC_LOGIC_BIS 0x3A -#define SW64_BPF_FUNC_LOGIC_ORNOT 0x3B -#define SW64_BPF_FUNC_LOGIC_XOR 0x3C -#define SW64_BPF_FUNC_LOGIC_EQV 0x3D - -#define SW64_BPF_OPCODE_CMP_REG 0x10 -#define SW64_BPF_OPCODE_CMP_IMM 0x12 - -#define SW64_BPF_FUNC_CMP_EQ 0x28 -#define SW64_BPF_FUNC_CMP_LT 0x29 -#define SW64_BPF_FUNC_CMP_LE 0x2A -#define SW64_BPF_FUNC_CMP_ULT 0x2B -#define SW64_BPF_FUNC_CMP_ULE 0x2C - /* special instuction used in jit_fill_hole() */ -#define SW64_BPF_ILLEGAL_INSN ((1 << 25) | 0x80) +#define SW64_BPF_ILLEGAL_INSN (0x1ff00000) /* pri_ret/b $31 */ enum sw64_bpf_registers { SW64_BPF_REG_V0 = 0, /* keep return value */ @@ -135,25 +137,45 @@ enum sw64_bpf_registers { /* SW64 load and store instructions */ #define SW64_BPF_LDBU(dst, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDBU, dst, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDBU, dst, rb, offset16) #define SW64_BPF_LDHU(dst, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDHU, dst, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDHU, dst, rb, offset16) #define SW64_BPF_LDW(dst, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDW, dst, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDW, dst, rb, offset16) #define SW64_BPF_LDL(dst, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDL, dst, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDL, dst, rb, offset16) #define SW64_BPF_STB(src, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STB, src, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STB, src, rb, offset16) #define SW64_BPF_STH(src, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STH, src, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STH, src, rb, offset16) #define SW64_BPF_STW(src, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STW, src, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STW, src, rb, offset16) #define SW64_BPF_STL(src, rb, offset16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STL, src, rb, offset16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STL, src, rb, offset16) #define SW64_BPF_LDI(dst, rb, imm16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDI, dst, rb, imm16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDI, dst, rb, imm16) #define SW64_BPF_LDIH(dst, rb, imm16) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDIH, dst, rb, imm16) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDIH, dst, rb, imm16) + +/* SW64 lock instructions */ +#define SW64_BPF_LLDW(ra, rb, offset16) \ + sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \ + ra, rb, offset16, SW64_BPF_FUNC_LOCK_LLDW) +#define SW64_BPF_LLDL(ra, rb, offset16) \ + sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \ + ra, rb, offset16, SW64_BPF_FUNC_LOCK_LLDL) +#define SW64_BPF_LSTW(ra, rb, offset16) \ + sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \ + ra, rb, offset16, SW64_BPF_FUNC_LOCK_LSTW) +#define SW64_BPF_LSTL(ra, rb, offset16) \ + sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \ + ra, rb, offset16, SW64_BPF_FUNC_LOCK_LSTL) +#define SW64_BPF_RD_F(ra) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_MISC, \ + ra, SW64_BPF_REG_ZR, SW64_BPF_FUNC_MISC_RD_F) +#define SW64_BPF_WR_F(ra) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_MISC, \ + ra, SW64_BPF_REG_ZR, SW64_BPF_FUNC_MISC_WR_F) /* SW64 ALU instructions REG format */ #define SW64_BPF_ADDW_REG(ra, rb, dst) \ @@ -182,10 +204,10 @@ enum sw64_bpf_registers { ra, rb, dst, SW64_BPF_FUNC_ALU_ZAPNOT) #define SW64_BPF_SEXTB_REG(rb, dst) \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ - 0, rb, dst, SW64_BPF_FUNC_ALU_SEXTB) + SW64_BPF_REG_ZR, rb, dst, SW64_BPF_FUNC_ALU_SEXTB) #define SW64_BPF_SEXTH_REG(rb, dst) \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ - 0, rb, dst, SW64_BPF_FUNC_ALU_SEXTH) + SW64_BPF_REG_ZR, rb, dst, SW64_BPF_FUNC_ALU_SEXTH) /* SW64 ALU instructions IMM format */ #define SW64_BPF_ADDW_IMM(ra, imm8, dst) \ @@ -214,130 +236,133 @@ enum sw64_bpf_registers { ra, imm8, dst, SW64_BPF_FUNC_ALU_ZAPNOT) #define SW64_BPF_SEXTB_IMM(imm8, dst) \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ - 0, imm8, dst, SW64_BPF_FUNC_ALU_SEXTB) + SW64_BPF_REG_ZR, imm8, dst, SW64_BPF_FUNC_ALU_SEXTB) +#define SW64_BPF_SEXTH_IMM(imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + SW64_BPF_REG_ZR, imm8, dst, SW64_BPF_FUNC_ALU_SEXTH) /* SW64 bit shift instructions REG format */ #define SW64_BPF_SLL_REG(src, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_BS_REG, \ - src, rb, dst, SW64_BPF_FUNC_BS_SLL) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + src, rb, dst, SW64_BPF_FUNC_ALU_SLL) #define SW64_BPF_SRL_REG(src, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_BS_REG, \ - src, rb, dst, SW64_BPF_FUNC_BS_SRL) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + src, rb, dst, SW64_BPF_FUNC_ALU_SRL) #define SW64_BPF_SRA_REG(src, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_BS_REG, \ - src, rb, dst, SW64_BPF_FUNC_BS_SRA) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + src, rb, dst, SW64_BPF_FUNC_ALU_SRA) /* SW64 bit shift instructions IMM format */ #define SW64_BPF_SLL_IMM(src, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_BS_IMM, \ - src, imm8, dst, SW64_BPF_FUNC_BS_SLL) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + src, imm8, dst, SW64_BPF_FUNC_ALU_SLL) #define SW64_BPF_SRL_IMM(src, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_BS_IMM, \ - src, imm8, dst, SW64_BPF_FUNC_BS_SRL) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + src, imm8, dst, SW64_BPF_FUNC_ALU_SRL) #define SW64_BPF_SRA_IMM(src, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_BS_IMM, \ - src, imm8, dst, SW64_BPF_FUNC_BS_SRA) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + src, imm8, dst, SW64_BPF_FUNC_ALU_SRA) /* SW64 control instructions */ #define SW64_BPF_CALL(ra, rb) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_BR_CALL, ra, rb, 0) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_CALL, ra, rb, 0) #define SW64_BPF_RET(rb) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_BR_RET, SW64_BPF_REG_ZR, rb, 0) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_RET, SW64_BPF_REG_ZR, rb, 0) #define SW64_BPF_JMP(ra, rb) \ - sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_BR_JMP, ra, rb, 0) + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_JMP, ra, rb, 0) #define SW64_BPF_BR(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BR, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR, ra, offset) #define SW64_BPF_BSR(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BSR, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BSR, ra, offset) #define SW64_BPF_BEQ(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BEQ, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BEQ, ra, offset) #define SW64_BPF_BNE(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BNE, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BNE, ra, offset) #define SW64_BPF_BLT(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLT, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLT, ra, offset) #define SW64_BPF_BLE(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLE, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLE, ra, offset) #define SW64_BPF_BGT(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BGT, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BGT, ra, offset) #define SW64_BPF_BGE(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BGE, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BGE, ra, offset) #define SW64_BPF_BLBC(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLBC, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLBC, ra, offset) #define SW64_BPF_BLBS(ra, offset) \ - sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLBS, ra, offset) + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLBS, ra, offset) /* SW64 bit logic instructions REG format */ #define SW64_BPF_AND_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ - ra, rb, dst, SW64_BPF_FUNC_LOGIC_AND) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_AND) #define SW64_BPF_ANDNOT_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ - ra, rb, dst, SW64_BPF_FUNC_LOGIC_BIC) -#define SW64_BPF_OR_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ - ra, rb, dst, SW64_BPF_FUNC_LOGIC_BIS) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_BIC) +#define SW64_BPF_BIS_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_BIS) #define SW64_BPF_ORNOT_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ - ra, rb, dst, SW64_BPF_FUNC_LOGIC_ORNOT) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_ORNOT) #define SW64_BPF_XOR_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ - ra, rb, dst, SW64_BPF_FUNC_LOGIC_XOR) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_XOR) #define SW64_BPF_EQV_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ - ra, rb, dst, SW64_BPF_FUNC_LOGIC_EQV) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_EQV) /* SW64 bit logic instructions IMM format */ #define SW64_BPF_AND_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_LOGIC_AND) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_AND) #define SW64_BPF_ANDNOT_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_LOGIC_BIC) -#define SW64_BPF_OR_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_LOGIC_BIS) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_BIC) +#define SW64_BPF_BIS_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_BIS) #define SW64_BPF_ORNOT_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_LOGIC_ORNOT) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_ORNOT) #define SW64_BPF_XOR_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_LOGIC_XOR) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_XOR) #define SW64_BPF_EQV_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_LOGIC_EQV) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_EQV) /* SW64 compare instructions REG format */ #define SW64_BPF_CMPEQ_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ - ra, rb, dst, SW64_BPF_FUNC_CMP_EQ) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPEQ) #define SW64_BPF_CMPLT_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ - ra, rb, dst, SW64_BPF_FUNC_CMP_LT) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPLT) #define SW64_BPF_CMPLE_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ - ra, rb, dst, SW64_BPF_FUNC_CMP_LE) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPLE) #define SW64_BPF_CMPULT_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ - ra, rb, dst, SW64_BPF_FUNC_CMP_ULT) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPULT) #define SW64_BPF_CMPULE_REG(ra, rb, dst) \ - sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ - ra, rb, dst, SW64_BPF_FUNC_CMP_ULE) + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPULE) /* SW64 compare instructions imm format */ #define SW64_BPF_CMPEQ_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_CMP_EQ) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPEQ) #define SW64_BPF_CMPLT_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_CMP_LT) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPLT) #define SW64_BPF_CMPLE_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_CMP_LE) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPLE) #define SW64_BPF_CMPULT_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_CMP_ULT) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPULT) #define SW64_BPF_CMPULE_IMM(ra, imm8, dst) \ - sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ - ra, imm8, dst, SW64_BPF_FUNC_CMP_ULE) + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPULE) #endif /* _SW64_BPF_JIT_H */ diff --git a/arch/sw_64/net/bpf_jit_comp.c b/arch/sw_64/net/bpf_jit_comp.c index 102de82d69e167d8e4ef2a6519743718befd3ce0..2c238c33e5740dcf4241de568644f7826c2a182b 100644 --- a/arch/sw_64/net/bpf_jit_comp.c +++ b/arch/sw_64/net/bpf_jit_comp.c @@ -29,46 +29,37 @@ #include "bpf_jit.h" -#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) -#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) -#define TCALL_CNT (MAX_BPF_JIT_REG + 2) - -/* - * TO-DO List: - * DIV - * MOD - */ +#define TCALL_CNT (MAX_BPF_JIT_REG + 0) static const int bpf2sw64[] = { /* return value from in-kernel function, and exit value from eBPF */ [BPF_REG_0] = SW64_BPF_REG_V0, /* arguments from eBPF program to in-kernel function */ - [BPF_REG_1] = SW64_BPF_REG_A1, - [BPF_REG_2] = SW64_BPF_REG_A2, - [BPF_REG_3] = SW64_BPF_REG_A3, - [BPF_REG_4] = SW64_BPF_REG_A4, - [BPF_REG_5] = SW64_BPF_REG_A5, + [BPF_REG_1] = SW64_BPF_REG_A0, + [BPF_REG_2] = SW64_BPF_REG_A1, + [BPF_REG_3] = SW64_BPF_REG_A2, + [BPF_REG_4] = SW64_BPF_REG_A3, + [BPF_REG_5] = SW64_BPF_REG_A4, /* callee saved registers that in-kernel function will preserve */ - [BPF_REG_6] = SW64_BPF_REG_S1, - [BPF_REG_7] = SW64_BPF_REG_S2, - [BPF_REG_8] = SW64_BPF_REG_S3, - [BPF_REG_9] = SW64_BPF_REG_S4, + [BPF_REG_6] = SW64_BPF_REG_S0, + [BPF_REG_7] = SW64_BPF_REG_S1, + [BPF_REG_8] = SW64_BPF_REG_S2, + [BPF_REG_9] = SW64_BPF_REG_S3, /* read-only frame pointer to access stack */ - [BPF_REG_FP] = SW64_BPF_REG_S0, - /* temporary registers for internal BPF JIT */ - [TMP_REG_1] = SW64_BPF_REG_T1, - [TMP_REG_2] = SW64_BPF_REG_T2, + [BPF_REG_FP] = SW64_BPF_REG_FP, /* tail_call_cnt */ - [TCALL_CNT] = SW64_BPF_REG_S5, + [TCALL_CNT] = SW64_BPF_REG_S4, /* temporary register for blinding constants */ - [BPF_REG_AX] = SW64_BPF_REG_T12, + [BPF_REG_AX] = SW64_BPF_REG_T11, }; struct jit_ctx { const struct bpf_prog *prog; int idx; // JITed instruction index + int current_tmp_reg; int epilogue_offset; int *insn_offset; // [bpf_insn_idx] = jited_insn_idx + int exentry_idx; u32 *image; // JITed instruction u32 stack_size; }; @@ -83,7 +74,7 @@ static inline u32 sw64_bpf_gen_format_br(int opcode, enum sw64_bpf_registers ra, { opcode = opcode << SW64_BPF_OPCODE_OFFSET; ra = ra << SW64_BPF_RA_OFFSET; - return opcode | ra | disp; + return opcode | ra | (disp & 0x1fffff); } static inline u32 sw64_bpf_gen_format_ls(int opcode, enum sw64_bpf_registers ra, @@ -92,7 +83,17 @@ static inline u32 sw64_bpf_gen_format_ls(int opcode, enum sw64_bpf_registers ra, opcode = opcode << SW64_BPF_OPCODE_OFFSET; ra = ra << SW64_BPF_RA_OFFSET; rb = rb << SW64_BPF_RB_OFFSET; - return opcode | ra | rb | disp; + return opcode | ra | rb | (disp & 0xffff); +} + +static inline u32 sw64_bpf_gen_format_ls_func(int opcode, enum sw64_bpf_registers ra, + enum sw64_bpf_registers rb, u16 disp, int function) +{ + opcode = opcode << SW64_BPF_OPCODE_OFFSET; + ra = ra << SW64_BPF_RA_OFFSET; + rb = rb << SW64_BPF_RB_OFFSET; + function = function << SW64_BPF_LS_FUNC_OFFSET; + return opcode | ra | rb | function | (disp & 0xfff); } static inline u32 sw64_bpf_gen_format_simple_alu_reg(int opcode, enum sw64_bpf_registers ra, @@ -107,12 +108,12 @@ static inline u32 sw64_bpf_gen_format_simple_alu_reg(int opcode, enum sw64_bpf_r } static inline u32 sw64_bpf_gen_format_simple_alu_imm(int opcode, enum sw64_bpf_registers ra, - enum sw64_bpf_registers rc, u8 imm, int function) + u32 imm, enum sw64_bpf_registers rc, int function) { opcode = opcode << SW64_BPF_OPCODE_OFFSET; ra = ra << SW64_BPF_RA_OFFSET; + imm = (imm & 0xff) << SW64_BPF_SIMPLE_ALU_IMM_OFFSET; rc = rc << SW64_BPF_SIMPLE_ALU_RC_OFFSET; - imm = imm << SW64_BPF_SIMPLE_ALU_IMM_OFFSET; function = function << SW64_BPF_SIMPLE_ALU_FUNC_OFFSET; return opcode | ra | imm | function | rc; } @@ -125,57 +126,85 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx) ctx->idx++; } -static inline void emit_sw64_ldu64(const int dst, const u64 imm64, struct jit_ctx *ctx) +static inline int get_tmp_reg(struct jit_ctx *ctx) { - u16 imm_tmp; - int reg_tmp = SW64_BPF_REG_T8; - - imm_tmp = (imm64 >> 60) & 0xf; - emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx); - emit(SW64_BPF_SLL_IMM(dst, 60, dst), ctx); - - imm_tmp = (imm64 >> 45) & 0x7fff; - emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); - emit(SW64_BPF_SLL_IMM(reg_tmp, 45, reg_tmp), ctx); - emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); - - imm_tmp = (imm64 >> 30) & 0x7fff; - emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); - emit(SW64_BPF_SLL_IMM(reg_tmp, 30, reg_tmp), ctx); - emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); - - imm_tmp = (imm64 >> 15) & 0x7fff; - emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); - emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); - emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + ctx->current_tmp_reg++; + /* Do not use 22-25. Should be more than enough. */ + if (unlikely(ctx->current_tmp_reg == 8)) { + pr_err("eBPF JIT %s[%d]: not enough temporary registers!\n", + current->comm, current->pid); + return -1; + } + return ctx->current_tmp_reg; +} - imm_tmp = imm64 & 0x7fff; - emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx); +static inline void put_tmp_reg(struct jit_ctx *ctx) +{ + ctx->current_tmp_reg--; + if (ctx->current_tmp_reg == 21) + ctx->current_tmp_reg = 7; } -static inline void emit_sw64_ldu32(const int dst, const u32 imm32, struct jit_ctx *ctx) +static void emit_sw64_ldu32(const int dst, const u32 imm, struct jit_ctx *ctx) { u16 imm_tmp; - int reg_tmp = SW64_BPF_REG_T8; + u8 reg_tmp = get_tmp_reg(ctx); + + if (!imm) { + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx); + put_tmp_reg(ctx); + return; + } + + if (imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + put_tmp_reg(ctx); + return; + } - imm_tmp = (imm32 >> 30) & 3; + if (imm >= U32_MAX - S16_MAX) { + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + put_tmp_reg(ctx); + return; + } + + imm_tmp = (imm >> 30) & 3; emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx); - emit(SW64_BPF_SLL_IMM(dst, 30, dst), ctx); + if (imm_tmp) + emit(SW64_BPF_SLL_IMM(dst, 30, dst), ctx); - imm_tmp = (imm32 >> 15) & 0x7fff; - emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); - emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); - emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + imm_tmp = (imm >> 15) & 0x7fff; + if (imm_tmp) { + emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); + emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); + emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + } + + imm_tmp = imm & 0x7fff; + if (imm_tmp) + emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx); - imm_tmp = imm32 & 0x7fff; - emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx); + put_tmp_reg(ctx); } -static inline void emit_sw64_lds32(const int dst, const s32 imm32, struct jit_ctx *ctx) +static void emit_sw64_lds32(const int dst, const s32 imm, struct jit_ctx *ctx) { - s16 hi = imm32 >> 16; - s16 lo = imm32 & 0xffff; - int reg_tmp = SW64_BPF_REG_T8; + s16 hi = imm >> 16; + s16 lo = imm & 0xffff; + u8 reg_tmp = get_tmp_reg(ctx); + + if (!imm) { + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx); + put_tmp_reg(ctx); + return; + } + + if (imm >= S16_MIN && imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + put_tmp_reg(ctx); + return; + } emit(SW64_BPF_LDIH(dst, SW64_BPF_REG_ZR, hi), ctx); if (lo & 0x8000) { // sign bit is 1 @@ -183,214 +212,422 @@ static inline void emit_sw64_lds32(const int dst, const s32 imm32, struct jit_ct emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, 1), ctx); emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); - emit(SW64_BPF_LDI(dst, dst, lo), ctx); + if (lo) + emit(SW64_BPF_LDI(dst, dst, lo), ctx); } else { // sign bit is 0 - emit(SW64_BPF_LDI(dst, dst, lo), ctx); + if (lo) + emit(SW64_BPF_LDI(dst, dst, lo), ctx); } + + put_tmp_reg(ctx); } -/* dst = ra / rb */ -static void emit_sw64_div(const int ra, const int rb, const int dst, struct jit_ctx *ctx) +static void emit_sw64_ldu64(const int dst, const u64 imm, struct jit_ctx *ctx) { - pr_err("DIV is not supported for now.\n"); + u16 imm_tmp; + u8 reg_tmp = get_tmp_reg(ctx); + + if (!imm) { + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx); + put_tmp_reg(ctx); + return; + } + + if (imm <= U32_MAX) { + put_tmp_reg(ctx); + return emit_sw64_ldu32(dst, (u32)imm, ctx); + } + + if (imm >= (U64_MAX - S16_MAX) || imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + put_tmp_reg(ctx); + return; + } + + imm_tmp = (imm >> 60) & 0xf; + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx); + if (imm_tmp) + emit(SW64_BPF_SLL_IMM(dst, 60, dst), ctx); + + imm_tmp = (imm >> 45) & 0x7fff; + if (imm_tmp) { + emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); + emit(SW64_BPF_SLL_IMM(reg_tmp, 45, reg_tmp), ctx); + emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + } + + imm_tmp = (imm >> 30) & 0x7fff; + if (imm_tmp) { + emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); + emit(SW64_BPF_SLL_IMM(reg_tmp, 30, reg_tmp), ctx); + emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + } + + imm_tmp = (imm >> 15) & 0x7fff; + if (imm_tmp) { + emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); + emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); + emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + } + + imm_tmp = imm & 0x7fff; + if (imm_tmp) + emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx); + + put_tmp_reg(ctx); } -/* dst = ra % rb */ -static void emit_sw64_mod(const int ra, const int rb, const int dst, struct jit_ctx *ctx) +/* Do not change!!! See arch/sw_64/lib/divide.S for more detail */ +#define REG(x) "$"str(x) +#define str(x) #x +#define DIVIDEND 24 +#define DIVISOR 25 +#define RESULT 27 +/* Make these functions noinline because we need their address at runtime */ +noinline void sw64_bpf_jit_helper_div32(void) { - pr_err("MOD is not supported for now.\n"); + register u32 __dividend asm(REG(DIVIDEND)); + register u32 __divisor asm(REG(DIVISOR)); + u32 res = __dividend / __divisor; + + asm volatile( + "" + :: "r"(res)); +} + +noinline void sw64_bpf_jit_helper_mod32(void) +{ + register u32 __dividend asm(REG(DIVIDEND)); + register u32 __divisor asm(REG(DIVISOR)); + u32 res = __dividend % __divisor; + + asm volatile( + "" + :: "r"(res)); +} + +noinline void sw64_bpf_jit_helper_div64(void) +{ + register u64 __dividend asm(REG(DIVIDEND)); + register u64 __divisor asm(REG(DIVISOR)); + u64 res = __dividend / __divisor; + + asm volatile( + "" + :: "r"(res)); +} + +noinline void sw64_bpf_jit_helper_mod64(void) +{ + register u64 __dividend asm(REG(DIVIDEND)); + register u64 __divisor asm(REG(DIVISOR)); + u64 res = __dividend % __divisor; + + asm volatile( + "" + :: "r"(res)); +} + +static void emit_sw64_divmod(const int dst, const int src, struct jit_ctx *ctx, u8 code) +{ + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, dst, DIVIDEND), ctx); + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, DIVISOR), ctx); + switch (BPF_CLASS(code)) { + case BPF_ALU: + switch (BPF_OP(code)) { + case BPF_DIV: + emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)sw64_bpf_jit_helper_div32, ctx); + break; + case BPF_MOD: + emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)sw64_bpf_jit_helper_mod32, ctx); + break; + } + emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx); + emit(SW64_BPF_ZAP_IMM(RESULT, 0xf0, dst), ctx); + break; + case BPF_ALU64: + switch (BPF_OP(code)) { + case BPF_DIV: + emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)sw64_bpf_jit_helper_div64, ctx); + break; + case BPF_MOD: + emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)sw64_bpf_jit_helper_mod64, ctx); + break; + } + emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx); + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, RESULT, dst), ctx); + break; + } +} + +#undef REG +#undef str +#undef DIVIDEND +#undef DIVISOR +#undef RESULT + +/* STX XADD: lock *(u32 *)(dst + off) += src */ +static void emit_sw64_xadd32(const int src, int dst, s16 off, struct jit_ctx *ctx) +{ + int atomic_start; + int atomic_end; + u8 tmp1 = get_tmp_reg(ctx); + u8 tmp2 = get_tmp_reg(ctx); + u8 tmp3 = get_tmp_reg(ctx); + + if (off < -0x800 || off > 0x7ff) { + emit(SW64_BPF_LDI(tmp1, dst, off), ctx); + dst = tmp1; + off = 0; + } + + atomic_start = ctx->idx; + emit(SW64_BPF_LLDW(tmp2, dst, off), ctx); + emit(SW64_BPF_LDI(tmp3, SW64_BPF_REG_ZR, 1), ctx); + emit(SW64_BPF_WR_F(tmp3), ctx); + emit(SW64_BPF_ADDW_REG(tmp2, src, tmp2), ctx); + if (ctx->idx & 1) + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); + emit(SW64_BPF_LSTW(tmp2, dst, off), ctx); + emit(SW64_BPF_RD_F(tmp3), ctx); + atomic_end = ctx->idx; + emit(SW64_BPF_BEQ(tmp3, atomic_start - atomic_end - 1), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); + put_tmp_reg(ctx); +} + +/* STX XADD: lock *(u64 *)(dst + off) += src */ +static void emit_sw64_xadd64(const int src, int dst, s16 off, struct jit_ctx *ctx) +{ + int atomic_start; + int atomic_end; + u8 tmp1 = get_tmp_reg(ctx); + u8 tmp2 = get_tmp_reg(ctx); + u8 tmp3 = get_tmp_reg(ctx); + + if (off < -0x800 || off > 0x7ff) { + emit(SW64_BPF_LDI(tmp1, dst, off), ctx); + dst = tmp1; + off = 0; + } + + atomic_start = ctx->idx; + emit(SW64_BPF_LLDL(tmp2, dst, off), ctx); + emit(SW64_BPF_LDI(tmp3, SW64_BPF_REG_ZR, 1), ctx); + emit(SW64_BPF_WR_F(tmp3), ctx); + emit(SW64_BPF_ADDL_REG(tmp2, src, tmp2), ctx); + if (ctx->idx & 1) + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); + emit(SW64_BPF_LSTL(tmp2, dst, off), ctx); + emit(SW64_BPF_RD_F(tmp3), ctx); + atomic_end = ctx->idx; + emit(SW64_BPF_BEQ(tmp3, atomic_start - atomic_end - 1), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); + put_tmp_reg(ctx); } static void emit_sw64_htobe16(const int dst, struct jit_ctx *ctx) { - int tmp = SW64_BPF_REG_T8; + u8 tmp = get_tmp_reg(ctx); - emit(SW64_BPF_LDI(tmp, dst, 0), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp, 0x2, tmp), ctx); + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x2, tmp), ctx); emit(SW64_BPF_ZAPNOT_IMM(dst, 0x1, dst), ctx); - emit(SW64_BPF_SRL_REG(tmp, 8, tmp), ctx); - emit(SW64_BPF_SLL_REG(dst, 8, dst), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp, dst), ctx); + emit(SW64_BPF_SRL_IMM(tmp, 8, tmp), ctx); + emit(SW64_BPF_SLL_IMM(dst, 8, dst), ctx); + emit(SW64_BPF_BIS_REG(dst, tmp, dst), ctx); + + put_tmp_reg(ctx); } static void emit_sw64_htobe32(const int dst, struct jit_ctx *ctx) { - int tmp1 = SW64_BPF_REG_T8; - int tmp2 = SW64_BPF_REG_T9; + u8 tmp1 = get_tmp_reg(ctx); + u8 tmp2 = get_tmp_reg(ctx); - emit(SW64_BPF_LDI(tmp1, dst, 0), ctx); - emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x1, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x8, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(dst, 0x6, dst), ctx); - emit(SW64_BPF_SLL_IMM(tmp1, 24, tmp1), ctx); - emit(SW64_BPF_SRL_IMM(tmp2, 24, tmp2), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); - - emit(SW64_BPF_LDI(tmp1, dst, 0), ctx); - emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x2, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x4, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(dst, 0x9, dst), ctx); + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x8, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 24, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x4, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 8, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x2, tmp1), ctx); emit(SW64_BPF_SLL_IMM(tmp1, 8, tmp1), ctx); - emit(SW64_BPF_SRL_IMM(tmp2, 8, tmp2), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x1, dst), ctx); + emit(SW64_BPF_SLL_IMM(dst, 24, dst), ctx); + emit(SW64_BPF_BIS_REG(dst, tmp2, dst), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); } static void emit_sw64_htobe64(const int dst, struct jit_ctx *ctx) { - int tmp1 = SW64_BPF_REG_T8; - int tmp2 = SW64_BPF_REG_T9; - - emit(SW64_BPF_LDI(tmp1, dst, 0), ctx); - emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x1, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x80, tmp1), ctx); - emit(SW64_BPF_ZAP_IMM(dst, 0x81, dst), ctx); - emit(SW64_BPF_SLL_IMM(tmp1, 56, tmp1), ctx); - emit(SW64_BPF_SRL_IMM(tmp2, 56, tmp2), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); - - emit(SW64_BPF_LDI(tmp1, dst, 0), ctx); - emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x2, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x40, tmp1), ctx); - emit(SW64_BPF_ZAP_IMM(dst, 0x42, dst), ctx); - emit(SW64_BPF_SLL_IMM(tmp1, 40, tmp1), ctx); - emit(SW64_BPF_SRL_IMM(tmp2, 40, tmp2), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); - - emit(SW64_BPF_LDI(tmp1, dst, 0), ctx); - emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x4, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x20, tmp1), ctx); - emit(SW64_BPF_ZAP_IMM(dst, 0x24, dst), ctx); - emit(SW64_BPF_SLL_IMM(tmp1, 24, tmp1), ctx); - emit(SW64_BPF_SRL_IMM(tmp2, 24, tmp2), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); - - emit(SW64_BPF_LDI(tmp1, dst, 0), ctx); - emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x8, tmp1), ctx); - emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x10, tmp1), ctx); - emit(SW64_BPF_ZAP_IMM(dst, 0x18, dst), ctx); + u8 tmp1 = get_tmp_reg(ctx); + u8 tmp2 = get_tmp_reg(ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x80, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 56, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x40, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 40, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x20, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 24, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x10, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 8, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x08, tmp1), ctx); emit(SW64_BPF_SLL_IMM(tmp1, 8, tmp1), ctx); - emit(SW64_BPF_SRL_IMM(tmp2, 8, tmp2), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x04, tmp1), ctx); + emit(SW64_BPF_SLL_IMM(tmp1, 24, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x02, tmp1), ctx); + emit(SW64_BPF_SLL_IMM(tmp1, 40, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x01, dst), ctx); + emit(SW64_BPF_SLL_IMM(dst, 56, dst), ctx); + emit(SW64_BPF_BIS_REG(dst, tmp2, dst), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); } static void jit_fill_hole(void *area, unsigned int size) { - memset(area, SW64_BPF_ILLEGAL_INSN, size); + unsigned long c = SW64_BPF_ILLEGAL_INSN; + + c |= c << 32; + __constant_c_memset(area, c, size); +} + +static int offset_to_epilogue(const struct jit_ctx *ctx); +static int bpf2sw64_offset(int bpf_idx, s32 off, const struct jit_ctx *ctx) +{ + int from = ctx->insn_offset[bpf_idx + 1]; + int to = ctx->insn_offset[bpf_idx + 1 + off]; + + if (ctx->image == NULL) + return 0; + + return to - from; } static int offset_to_epilogue(const struct jit_ctx *ctx) { + if (ctx->image == NULL) + return 0; + return ctx->epilogue_offset - ctx->idx; } -/* For tail call to jump into */ -#define PROLOGUE_OFFSET 8 +/* For tail call, jump to set up function call stack */ +#define PROLOGUE_OFFSET 11 static void build_prologue(struct jit_ctx *ctx, bool was_classic) { - const int r6 = bpf2sw64[BPF_REG_6]; - const int r7 = bpf2sw64[BPF_REG_7]; - const int r8 = bpf2sw64[BPF_REG_8]; - const int r9 = bpf2sw64[BPF_REG_9]; - const int fp = bpf2sw64[BPF_REG_FP]; - const int tcc = bpf2sw64[TCALL_CNT]; - const int tmp1 = bpf2sw64[TMP_REG_1]; + const u8 r6 = bpf2sw64[BPF_REG_6]; + const u8 r7 = bpf2sw64[BPF_REG_7]; + const u8 r8 = bpf2sw64[BPF_REG_8]; + const u8 r9 = bpf2sw64[BPF_REG_9]; + const u8 fp = bpf2sw64[BPF_REG_FP]; + const u8 tcc = bpf2sw64[TCALL_CNT]; /* Save callee-saved registers */ - emit(SW64_BPF_SUBL_REG(SW64_BPF_REG_SP, 56, SW64_BPF_REG_SP), ctx); - emit(SW64_BPF_STL(r6, SW64_BPF_REG_SP, 0), ctx); - emit(SW64_BPF_STL(r7, SW64_BPF_REG_SP, 8), ctx); - emit(SW64_BPF_STL(r8, SW64_BPF_REG_SP, 16), ctx); - emit(SW64_BPF_STL(r9, SW64_BPF_REG_SP, 24), ctx); - emit(SW64_BPF_STL(fp, SW64_BPF_REG_SP, 32), ctx); - emit(SW64_BPF_STL(tcc, SW64_BPF_REG_SP, 40), ctx); - emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 48), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -64), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 0), ctx); + emit(SW64_BPF_STL(fp, SW64_BPF_REG_SP, 8), ctx); + emit(SW64_BPF_STL(r6, SW64_BPF_REG_SP, 16), ctx); + emit(SW64_BPF_STL(r7, SW64_BPF_REG_SP, 24), ctx); + emit(SW64_BPF_STL(r8, SW64_BPF_REG_SP, 32), ctx); + emit(SW64_BPF_STL(r9, SW64_BPF_REG_SP, 40), ctx); + emit(SW64_BPF_STL(tcc, SW64_BPF_REG_SP, 48), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, 56), ctx); /* Set up BPF prog stack base register */ - emit(SW64_BPF_LDI(fp, SW64_BPF_REG_SP, 0), ctx); + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_SP, fp), ctx); if (!was_classic) /* Initialize tail_call_cnt */ - emit(SW64_BPF_LDI(tcc, SW64_BPF_REG_ZR, 0), ctx); + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, tcc), ctx); /* Set up function call stack */ - ctx->stack_size = ctx->prog->aux->stack_depth; - emit_sw64_ldu32(tmp1, ctx->stack_size, ctx); - emit(SW64_BPF_SUBL_REG(SW64_BPF_REG_SP, tmp1, SW64_BPF_REG_SP), ctx); + ctx->stack_size = (ctx->prog->aux->stack_depth + 15) & (~15); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -ctx->stack_size), ctx); } static void build_epilogue(struct jit_ctx *ctx) { - const int r6 = bpf2sw64[BPF_REG_6]; - const int r7 = bpf2sw64[BPF_REG_7]; - const int r8 = bpf2sw64[BPF_REG_8]; - const int r9 = bpf2sw64[BPF_REG_9]; - const int fp = bpf2sw64[BPF_REG_FP]; - const int tcc = bpf2sw64[TCALL_CNT]; - const int tmp1 = bpf2sw64[TMP_REG_1]; + const u8 r6 = bpf2sw64[BPF_REG_6]; + const u8 r7 = bpf2sw64[BPF_REG_7]; + const u8 r8 = bpf2sw64[BPF_REG_8]; + const u8 r9 = bpf2sw64[BPF_REG_9]; + const u8 fp = bpf2sw64[BPF_REG_FP]; + const u8 tcc = bpf2sw64[TCALL_CNT]; /* Destroy function call stack */ - emit_sw64_ldu32(tmp1, ctx->stack_size, ctx); - emit(SW64_BPF_ADDL_REG(SW64_BPF_REG_SP, tmp1, SW64_BPF_REG_SP), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, ctx->stack_size), ctx); /* Restore callee-saved registers */ - emit(SW64_BPF_LDL(r6, SW64_BPF_REG_SP, 0), ctx); - emit(SW64_BPF_LDL(r7, SW64_BPF_REG_SP, 8), ctx); - emit(SW64_BPF_LDL(r8, SW64_BPF_REG_SP, 16), ctx); - emit(SW64_BPF_LDL(r9, SW64_BPF_REG_SP, 24), ctx); - emit(SW64_BPF_LDL(fp, SW64_BPF_REG_SP, 32), ctx); - emit(SW64_BPF_LDL(tcc, SW64_BPF_REG_SP, 40), ctx); - emit(SW64_BPF_LDL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 48), ctx); - emit(SW64_BPF_ADDL_REG(SW64_BPF_REG_SP, 56, SW64_BPF_REG_SP), ctx); + emit(SW64_BPF_LDL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 0), ctx); + emit(SW64_BPF_LDL(fp, SW64_BPF_REG_SP, 8), ctx); + emit(SW64_BPF_LDL(r6, SW64_BPF_REG_SP, 16), ctx); + emit(SW64_BPF_LDL(r7, SW64_BPF_REG_SP, 24), ctx); + emit(SW64_BPF_LDL(r8, SW64_BPF_REG_SP, 32), ctx); + emit(SW64_BPF_LDL(r9, SW64_BPF_REG_SP, 40), ctx); + emit(SW64_BPF_LDL(tcc, SW64_BPF_REG_SP, 48), ctx); + emit(SW64_BPF_LDL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, 56), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, 64), ctx); /* Return */ emit(SW64_BPF_RET(SW64_BPF_REG_RA), ctx); } -static int out_offset = -1; /* initialized on the first pass of build_body() */ static int emit_bpf_tail_call(struct jit_ctx *ctx) { - /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ + /* bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) */ const u8 r2 = bpf2sw64[BPF_REG_2]; /* struct bpf_array *array */ - const u8 r3 = bpf2sw64[BPF_REG_3]; /* u64 index */ + const u8 r3 = bpf2sw64[BPF_REG_3]; /* u32 index */ - const u8 tmp = bpf2sw64[TMP_REG_1]; - const u8 prg = bpf2sw64[TMP_REG_2]; + const u8 tmp = get_tmp_reg(ctx); + const u8 prg = get_tmp_reg(ctx); const u8 tcc = bpf2sw64[TCALL_CNT]; - const int idx0 = ctx->idx; -#define cur_offset (ctx->idx - idx0) -#define jmp_offset (out_offset - (cur_offset)) u64 offset; + static int out_idx; +#define out_offset (ctx->image ? (out_idx - ctx->idx - 1) : 0) /* if (index >= array->map.max_entries) * goto out; */ offset = offsetof(struct bpf_array, map.max_entries); - emit_sw64_ldu64(tmp, offset, ctx); /* tmp = offset */ - emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &map.max_entries */ + emit_sw64_ldu64(tmp, offset, ctx); + emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &map.max_entries */ emit(SW64_BPF_LDW(tmp, tmp, 0), ctx); /* tmp = *tmp = map.max_entries */ - emit(SW64_BPF_ZAPNOT_IMM(tmp, 0xf, tmp), ctx); /* map.max_entries is u32 */ - emit(SW64_BPF_SUBL_REG(r3, tmp, tmp), ctx); /* tmp = r3 - tmp = index - map.max_entries */ - emit(SW64_BPF_BGE(tmp, jmp_offset), ctx); + emit(SW64_BPF_ZAP_IMM(tmp, 0xf0, tmp), ctx); /* map.max_entries is u32 */ + emit(SW64_BPF_ZAP_IMM(r3, 0xf0, r3), ctx); /* index is u32 */ + emit(SW64_BPF_CMPULE_REG(tmp, r3, tmp), ctx); + emit(SW64_BPF_BNE(tmp, out_offset), ctx); /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; * tail_call_cnt++; */ - emit(SW64_BPF_LDI(tmp, SW64_BPF_REG_ZR, MAX_TAIL_CALL_CNT), ctx); - emit(SW64_BPF_SUBL_REG(tcc, tmp, tmp), ctx); - emit(SW64_BPF_BGT(tmp, jmp_offset), ctx); + emit_sw64_ldu64(tmp, MAX_TAIL_CALL_CNT, ctx); + emit(SW64_BPF_CMPULT_REG(tmp, tcc, tmp), ctx); + emit(SW64_BPF_BNE(tmp, out_offset), ctx); emit(SW64_BPF_ADDL_IMM(tcc, 1, tcc), ctx); /* prog = array->ptrs[index]; @@ -398,34 +635,66 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) * goto out; */ offset = offsetof(struct bpf_array, ptrs); - emit_sw64_ldu64(tmp, offset, ctx); /* tmp = offset of ptrs */ - emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &ptrs */ - emit(SW64_BPF_SLL_IMM(r3, 3, prg), ctx); /* prg = r3 * 8, ptrs is 8 bit aligned */ - emit(SW64_BPF_ADDL_REG(tmp, prg, prg), ctx); /* prg = tmp + prg = &prog */ - emit(SW64_BPF_LDL(prg, prg, 0), ctx); /* prg = *prg = prog */ - emit(SW64_BPF_BEQ(prg, jmp_offset), ctx); + emit_sw64_ldu64(tmp, offset, ctx); + emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &ptrs[0] */ + emit(SW64_BPF_SLL_IMM(r3, 3, prg), ctx); /* prg = r3 * 8, each entry is a pointer */ + emit(SW64_BPF_ADDL_REG(tmp, prg, prg), ctx); /* prg = tmp + prg = &ptrs[index] */ + emit(SW64_BPF_LDL(prg, prg, 0), ctx); /* prg = *prg = ptrs[index] = prog */ + emit(SW64_BPF_BEQ(prg, out_offset), ctx); /* goto *(prog->bpf_func + prologue_offset); */ offset = offsetof(struct bpf_prog, bpf_func); - emit_sw64_ldu64(tmp, offset, ctx); /* tmp = offset */ + emit_sw64_ldu64(tmp, offset, ctx); emit(SW64_BPF_ADDL_REG(prg, tmp, tmp), ctx); /* tmp = prg + tmp = &bpf_func */ - emit(SW64_BPF_LDW(tmp, tmp, 0), ctx); /* tmp = *tmp = bpf_func */ - emit(SW64_BPF_ZAPNOT_IMM(tmp, 0xf, tmp), ctx); /* bpf_func is unsigned int */ - emit(SW64_BPF_ADDL_REG(tmp, sizeof(u32) * PROLOGUE_OFFSET, tmp), ctx); - emit(SW64_BPF_ADDL_REG(SW64_BPF_REG_SP, ctx->stack_size, SW64_BPF_REG_SP), ctx); - emit(SW64_BPF_BR(tmp, 0), ctx); + emit(SW64_BPF_LDL(tmp, tmp, 0), ctx); /* tmp = *tmp = bpf_func */ + emit(SW64_BPF_BEQ(tmp, out_offset), ctx); + emit(SW64_BPF_LDI(tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, ctx->stack_size), ctx); + emit(SW64_BPF_JMP(SW64_BPF_REG_ZR, tmp), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); /* out */ - if (out_offset == -1) - out_offset = cur_offset; - if (cur_offset != out_offset) { - pr_err("tail_call out_offset = %d, expected %d!\n", - cur_offset, out_offset); + if (ctx->image == NULL) + out_idx = ctx->idx; + if (ctx->image != NULL && out_idx <= 0) return -1; - } +#undef out_offset + return 0; +} + +/* For accesses to BTF pointers, add an entry to the exception table */ +static int add_exception_handler(const struct bpf_insn *insn, + struct jit_ctx *ctx, + int dst_reg) +{ + off_t offset; + unsigned long pc; + struct exception_table_entry *ex; + + if (!ctx->image) + /* First pass */ + return 0; + + if (!ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM) + return 0; + + if (WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) + return -EINVAL; + + ex = &ctx->prog->aux->extable[ctx->exentry_idx]; + pc = (unsigned long)&ctx->image[ctx->idx - 1]; + + offset = (long)&ex->insn - pc; + ex->insn = offset; + + ex->fixup.bits.nextinsn = sizeof(u32); + ex->fixup.bits.valreg = dst_reg; + ex->fixup.bits.errreg = SW64_BPF_REG_ZR; + + ctx->exentry_idx++; return 0; -#undef cur_offset -#undef jmp_offset } /* JITs an eBPF instruction. @@ -434,80 +703,110 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) * >0 - successfully JITed a 16-byte eBPF instruction. * <0 - failed to JIT. */ -static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) +static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 code = insn->code; - const u8 dst = bpf2sw64[insn->dst_reg]; - const u8 src = bpf2sw64[insn->src_reg]; - const u8 tmp1 = bpf2sw64[TMP_REG_1]; - const u8 tmp2 = bpf2sw64[TMP_REG_2]; + u8 dst = bpf2sw64[insn->dst_reg]; + u8 src = bpf2sw64[insn->src_reg]; + const u8 tmp1 __maybe_unused = get_tmp_reg(ctx); + const u8 tmp2 __maybe_unused = get_tmp_reg(ctx); const s16 off = insn->off; const s32 imm = insn->imm; - int jmp_offset; + const int bpf_idx = insn - ctx->prog->insnsi; + s32 jmp_offset; u64 func; struct bpf_insn insn1; u64 imm64; + int ret; switch (code) { case BPF_ALU | BPF_MOV | BPF_X: + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_MOV | BPF_X: - emit(SW64_BPF_LDI(dst, src, 0), ctx); + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, dst), ctx); break; case BPF_ALU | BPF_ADD | BPF_X: emit(SW64_BPF_ADDW_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); break; case BPF_ALU64 | BPF_ADD | BPF_X: emit(SW64_BPF_ADDL_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_SUB | BPF_X: emit(SW64_BPF_SUBW_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); break; case BPF_ALU64 | BPF_SUB | BPF_X: emit(SW64_BPF_SUBL_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_MUL | BPF_X: emit(SW64_BPF_MULW_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); break; case BPF_ALU64 | BPF_MUL | BPF_X: emit(SW64_BPF_MULL_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_DIV | BPF_X: + emit_sw64_divmod(dst, src, ctx, code); + break; case BPF_ALU64 | BPF_DIV | BPF_X: - emit_sw64_div(dst, src, dst, ctx); - return -EINVAL; + emit_sw64_divmod(dst, src, ctx, code); + break; case BPF_ALU | BPF_MOD | BPF_X: + emit_sw64_divmod(dst, src, ctx, code); + break; case BPF_ALU64 | BPF_MOD | BPF_X: - emit_sw64_mod(dst, src, dst, ctx); - return -EINVAL; + emit_sw64_divmod(dst, src, ctx, code); + break; case BPF_ALU | BPF_LSH | BPF_X: + emit(SW64_BPF_SLL_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_LSH | BPF_X: emit(SW64_BPF_SLL_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_RSH | BPF_X: - emit(SW64_BPF_ZAPNOT_IMM(dst, 0xf, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); case BPF_ALU64 | BPF_RSH | BPF_X: emit(SW64_BPF_SRL_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_ARSH | BPF_X: + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, dst), ctx); + emit(SW64_BPF_SRA_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_ARSH | BPF_X: emit(SW64_BPF_SRA_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_AND | BPF_X: + emit(SW64_BPF_AND_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_AND | BPF_X: emit(SW64_BPF_AND_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_OR | BPF_X: + emit(SW64_BPF_BIS_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_OR | BPF_X: - emit(SW64_BPF_OR_REG(dst, src, dst), ctx); + emit(SW64_BPF_BIS_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_XOR | BPF_X: + emit(SW64_BPF_XOR_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_XOR | BPF_X: emit(SW64_BPF_XOR_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_NEG: + emit(SW64_BPF_SUBW_REG(SW64_BPF_REG_ZR, dst, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_NEG: - emit(SW64_BPF_SEXTB_IMM(0xff, tmp1), ctx); - emit(SW64_BPF_XOR_IMM(dst, tmp1, dst), ctx); + emit(SW64_BPF_SUBL_REG(SW64_BPF_REG_ZR, dst, dst), ctx); break; case BPF_ALU | BPF_END | BPF_TO_LE: switch (imm) { @@ -519,7 +818,12 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case 64: break; + default: + pr_err("eBPF JIT %s[%d]: BPF_TO_LE unknown size\n", + current->comm, current->pid); + return -EINVAL; } + break; case BPF_ALU | BPF_END | BPF_TO_BE: switch (imm) { case 16: @@ -531,73 +835,223 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case 64: emit_sw64_htobe64(dst, ctx); break; + default: + pr_err("eBPF JIT %s[%d]: BPF_TO_BE unknown size\n", + current->comm, current->pid); + return -EINVAL; } + break; case BPF_ALU | BPF_MOV | BPF_K: + if (imm >= S16_MIN && imm <= S16_MAX) + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + else + emit_sw64_ldu32(dst, imm, ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_MOV | BPF_K: - emit_sw64_lds32(dst, imm, ctx); + if (imm >= S16_MIN && imm <= S16_MAX) + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + else + emit_sw64_lds32(dst, imm, ctx); break; case BPF_ALU | BPF_ADD | BPF_K: + if (imm >= S16_MIN && imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, dst, imm), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_ADDW_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_ADD | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); + if (imm >= S16_MIN && imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, dst, imm), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_SUB | BPF_K: + if (imm >= -S16_MAX && imm <= -S16_MIN) { + emit(SW64_BPF_LDI(dst, dst, -imm), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_SUBL_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_SUB | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_SUBL_REG(dst, tmp1, dst), ctx); + if (imm >= -S16_MAX && imm <= -S16_MIN) { + emit(SW64_BPF_LDI(dst, dst, -imm), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_SUBL_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_MUL | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_MULL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_MULL_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_MUL | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_MULL_REG(dst, tmp1, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_MULL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_MULL_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_DIV | BPF_K: + emit_sw64_ldu32(tmp1, imm, ctx); + emit_sw64_divmod(dst, tmp1, ctx, code); + break; case BPF_ALU64 | BPF_DIV | BPF_K: emit_sw64_lds32(tmp1, imm, ctx); - emit_sw64_div(dst, src, tmp1, ctx); - return -EINVAL; + emit_sw64_divmod(dst, tmp1, ctx, code); + break; case BPF_ALU | BPF_MOD | BPF_K: + emit_sw64_ldu32(tmp1, imm, ctx); + emit_sw64_divmod(dst, tmp1, ctx, code); + break; case BPF_ALU64 | BPF_MOD | BPF_K: emit_sw64_lds32(tmp1, imm, ctx); - emit_sw64_mod(dst, src, tmp1, ctx); - return -EINVAL; + emit_sw64_divmod(dst, tmp1, ctx, code); + break; case BPF_ALU | BPF_LSH | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SLL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_SLL_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_LSH | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_SLL_REG(dst, tmp1, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SLL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_SLL_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_RSH | BPF_K: - emit(SW64_BPF_ZAPNOT_IMM(dst, 0xf, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SRL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_SRL_REG(dst, tmp1, dst), ctx); + } + break; case BPF_ALU64 | BPF_RSH | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_SRL_REG(dst, tmp1, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SRL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_SRL_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_ARSH | BPF_K: + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SRA_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_SRA_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_ARSH | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_SRA_REG(dst, tmp1, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SRA_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_SRA_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_AND | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_AND_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_AND | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_AND_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_OR | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_BIS_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_BIS_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_OR | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_OR_REG(dst, tmp1, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_BIS_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_BIS_REG(dst, tmp1, dst), ctx); + } break; case BPF_ALU | BPF_XOR | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_XOR_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; case BPF_ALU64 | BPF_XOR | BPF_K: - emit_sw64_lds32(tmp1, imm, ctx); - emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_XOR_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx); + } break; case BPF_JMP | BPF_JA: - emit(SW64_BPF_BR(SW64_BPF_REG_RA, off), ctx); + jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx); + if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) { + emit(SW64_BPF_BR(SW64_BPF_REG_ZR, jmp_offset), ctx); + } else { + pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n", + current->comm, current->pid, jmp_offset); + return -EINVAL; + } break; + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, src, tmp1), ctx); + src = tmp1; + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, tmp2), ctx); + dst = tmp2; case BPF_JMP | BPF_JEQ | BPF_X: case BPF_JMP | BPF_JGT | BPF_X: case BPF_JMP | BPF_JLT | BPF_X: @@ -645,9 +1099,29 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit(SW64_BPF_AND_REG(dst, src, tmp1), ctx); break; } - emit(SW64_BPF_BLBS(tmp1, off), ctx); + jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx); + if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) { + emit(SW64_BPF_BNE(tmp1, jmp_offset), ctx); + } else { + pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n", + current->comm, current->pid, jmp_offset); + return -EINVAL; + } break; + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, tmp2), ctx); + dst = tmp2; case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP | BPF_JLT | BPF_K: @@ -662,47 +1136,57 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_sw64_lds32(tmp1, imm, ctx); switch (BPF_OP(code)) { case BPF_JEQ: - emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp1), ctx); + emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp2), ctx); break; case BPF_JGT: - emit(SW64_BPF_CMPULT_REG(tmp1, dst, tmp1), ctx); + emit(SW64_BPF_CMPULT_REG(tmp1, dst, tmp2), ctx); break; case BPF_JLT: - emit(SW64_BPF_CMPULT_REG(dst, tmp1, tmp1), ctx); + emit(SW64_BPF_CMPULT_REG(dst, tmp1, tmp2), ctx); break; case BPF_JGE: - emit(SW64_BPF_CMPULE_REG(tmp1, dst, tmp1), ctx); + emit(SW64_BPF_CMPULE_REG(tmp1, dst, tmp2), ctx); break; case BPF_JLE: - emit(SW64_BPF_CMPULE_REG(dst, tmp1, tmp1), ctx); + emit(SW64_BPF_CMPULE_REG(dst, tmp1, tmp2), ctx); break; case BPF_JNE: - emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp1), ctx); - emit(SW64_BPF_XOR_IMM(tmp1, 1, tmp1), ctx); + emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp2), ctx); + emit(SW64_BPF_XOR_IMM(tmp2, 1, tmp2), ctx); break; case BPF_JSGT: - emit(SW64_BPF_CMPLT_REG(tmp1, dst, tmp1), ctx); + emit(SW64_BPF_CMPLT_REG(tmp1, dst, tmp2), ctx); break; case BPF_JSLT: - emit(SW64_BPF_CMPLT_REG(dst, tmp1, tmp1), ctx); + emit(SW64_BPF_CMPLT_REG(dst, tmp1, tmp2), ctx); break; case BPF_JSGE: - emit(SW64_BPF_CMPLE_REG(tmp1, dst, tmp1), ctx); + emit(SW64_BPF_CMPLE_REG(tmp1, dst, tmp2), ctx); break; case BPF_JSLE: - emit(SW64_BPF_CMPLE_REG(dst, tmp1, tmp1), ctx); + emit(SW64_BPF_CMPLE_REG(dst, tmp1, tmp2), ctx); break; case BPF_JSET: - emit(SW64_BPF_AND_REG(dst, tmp1, tmp1), ctx); + emit(SW64_BPF_AND_REG(dst, tmp1, tmp2), ctx); break; } - emit(SW64_BPF_BLBS(tmp1, off), ctx); + jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx); + if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) { + emit(SW64_BPF_BNE(tmp2, jmp_offset), ctx); + } else { + pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n", + current->comm, current->pid, jmp_offset); + return -EINVAL; + } break; case BPF_JMP | BPF_CALL: func = (u64)__bpf_call_base + imm; - emit_sw64_ldu64(tmp1, func, ctx); - emit(SW64_BPF_CALL(SW64_BPF_REG_RA, tmp1), ctx); + if ((func & 0xffffffffe0000000UL) != 0xffffffff80000000UL) + /* calling bpf program, switch to vmalloc addr */ + func = (func & 0xffffffff) | 0xfffff00000000000UL; + emit_sw64_ldu64(SW64_BPF_REG_PV, func, ctx); + emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx); break; case BPF_JMP | BPF_TAIL_CALL: @@ -711,38 +1195,60 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case BPF_JMP | BPF_EXIT: - if (insn - ctx->prog->insnsi + 1 == ctx->prog->len) + // if this is the last instruction, fallthrough to epilogue + if (bpf_idx == ctx->prog->len - 1) break; - jmp_offset = (offset_to_epilogue(ctx) - 1) * 4; - // emit(SW64_BPF_BR(SW64_BPF_REG_ZR, jmp_offset), ctx); - // break; - emit_sw64_lds32(tmp1, jmp_offset, ctx); - emit(SW64_BPF_BR(tmp2, 0), ctx); - emit(SW64_BPF_ADDL_REG(tmp1, tmp2, tmp1), ctx); - emit(SW64_BPF_JMP(SW64_BPF_REG_ZR, tmp1), ctx); + jmp_offset = offset_to_epilogue(ctx) - 1; + // epilogue is always at the end, must jump forward + if (jmp_offset >= -1 && jmp_offset <= 0xfffff) { + if (ctx->image && !jmp_offset) + // if this is the last instruction, fallthrough to epilogue + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); + else + emit(SW64_BPF_BR(SW64_BPF_REG_ZR, jmp_offset), ctx); + } else { + pr_err("eBPF JIT %s[%d]: BPF_EXIT out of range, %d instructions\n", + current->comm, current->pid, jmp_offset); + return -EINVAL; + } break; case BPF_LD | BPF_IMM | BPF_DW: insn1 = insn[1]; - imm64 = (u64)insn1.imm << 32 | (u32)imm; + imm64 = ((u64)insn1.imm << 32) | (u32)imm; emit_sw64_ldu64(dst, imm64, ctx); - + put_tmp_reg(ctx); + put_tmp_reg(ctx); return 1; /* LDX: dst = *(size *)(src + off) */ case BPF_LDX | BPF_MEM | BPF_W: - emit(SW64_BPF_LDW(dst, src, off), ctx); - break; case BPF_LDX | BPF_MEM | BPF_H: - emit(SW64_BPF_LDHU(dst, src, off), ctx); - emit(SW64_BPF_SEXTH_REG(dst, dst), ctx); - break; case BPF_LDX | BPF_MEM | BPF_B: - emit(SW64_BPF_LDBU(dst, src, off), ctx); - emit(SW64_BPF_SEXTB_REG(dst, dst), ctx); - break; case BPF_LDX | BPF_MEM | BPF_DW: - emit(SW64_BPF_LDW(dst, src, off), ctx); + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_W: + case BPF_LDX | BPF_PROBE_MEM | BPF_H: + case BPF_LDX | BPF_PROBE_MEM | BPF_B: + switch (BPF_SIZE(code)) { + case BPF_W: + emit(SW64_BPF_LDW(dst, src, off), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_H: + emit(SW64_BPF_LDHU(dst, src, off), ctx); + break; + case BPF_B: + emit(SW64_BPF_LDBU(dst, src, off), ctx); + break; + case BPF_DW: + emit(SW64_BPF_LDL(dst, src, off), ctx); + break; + } + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; break; /* ST: *(size *)(dst + off) = imm */ @@ -773,33 +1279,32 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit(SW64_BPF_STW(src, dst, off), ctx); break; case BPF_STX | BPF_MEM | BPF_H: - emit(SW64_BPF_STW(src, dst, off), ctx); + emit(SW64_BPF_STH(src, dst, off), ctx); break; case BPF_STX | BPF_MEM | BPF_B: - emit(SW64_BPF_STW(src, dst, off), ctx); + emit(SW64_BPF_STB(src, dst, off), ctx); break; case BPF_STX | BPF_MEM | BPF_DW: - emit(SW64_BPF_STW(src, dst, off), ctx); + emit(SW64_BPF_STL(src, dst, off), ctx); break; /* STX XADD: lock *(u32 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_W: - emit(SW64_BPF_LDW(tmp1, dst, off), ctx); - emit(SW64_BPF_ADDW_REG(tmp1, src, tmp1), ctx); - emit(SW64_BPF_STW(tmp1, dst, off), ctx); + emit_sw64_xadd32(src, dst, off, ctx); break; /* STX XADD: lock *(u64 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_DW: - emit(SW64_BPF_LDL(tmp1, dst, off), ctx); - emit(SW64_BPF_ADDL_REG(tmp1, src, tmp1), ctx); - emit(SW64_BPF_STL(tmp1, dst, off), ctx); + emit_sw64_xadd64(src, dst, off, ctx); break; default: - pr_err("unknown opcode %02x\n", code); + pr_err("eBPF JIT %s[%d]: unknown opcode 0x%02x\n", + current->comm, current->pid, code); return -EINVAL; } + put_tmp_reg(ctx); + put_tmp_reg(ctx); return 0; } @@ -812,17 +1317,17 @@ static int build_body(struct jit_ctx *ctx) const struct bpf_insn *insn = &prog->insnsi[i]; int ret; + if (ctx->image == NULL) + ctx->insn_offset[i] = ctx->idx; ret = build_insn(insn, ctx); - if (ret > 0) { + if (ret < 0) + return ret; + while (ret > 0) { i++; if (ctx->image == NULL) - ctx->insn_offset[i] = ctx->idx; - continue; + ctx->insn_offset[i] = ctx->insn_offset[i - 1]; + ret--; } - if (ctx->image == NULL) - ctx->insn_offset[i] = ctx->idx; - if (ret) - return ret; } return 0; @@ -837,6 +1342,9 @@ static int validate_code(struct jit_ctx *ctx) return -1; } + if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) + return -1; + return 0; } @@ -854,7 +1362,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; - int image_size; + int image_size, prog_size, extable_size; u8 *image_ptr; if (!prog->jit_requested) @@ -885,13 +1393,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) image_ptr = jit_data->image; header = jit_data->header; extra_pass = true; - image_size = sizeof(u32) * ctx.idx; + prog_size = sizeof(u32) * ctx.idx; goto skip_init_ctx; } memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; - ctx.insn_offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); + ctx.insn_offset = kcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); if (ctx.insn_offset == NULL) { prog = orig_prog; goto out_off; @@ -907,11 +1415,16 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_off; } - ctx.epilogue_offset = ctx.idx; + ctx.insn_offset[prog->len] = ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); + extable_size = prog->aux->num_exentries * + sizeof(struct exception_table_entry); + /* Now we know the actual image size. */ - image_size = sizeof(u32) * ctx.idx; + /* And we need extra 8 bytes for lock instructions alignment */ + prog_size = sizeof(u32) * ctx.idx + 8; + image_size = prog_size + extable_size; header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); if (header == NULL) { @@ -921,9 +1434,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* 2. Now, the actual pass. */ - ctx.image = (u32 *)image_ptr; + /* lock instructions need 8-byte alignment */ + ctx.image = (u32 *)(((unsigned long)image_ptr + 7) & (~7)); + if (extable_size) + prog->aux->extable = (void *)image_ptr + prog_size; skip_init_ctx: ctx.idx = 0; + ctx.exentry_idx = 0; build_prologue(&ctx, was_classic); @@ -944,7 +1461,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* And we're done. */ if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, image_size, 2, ctx.image); + bpf_jit_dump(prog->len, prog_size, 2, ctx.image); bpf_flush_icache(header, ctx.image + ctx.idx); @@ -957,7 +1474,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } prog->bpf_func = (void *)ctx.image; prog->jited = 1; - prog->jited_len = image_size; + prog->jited_len = prog_size; + if (ctx.current_tmp_reg) { + pr_err("eBPF JIT %s[%d]: unreleased temporary regsters %d\n", + current->comm, current->pid, ctx.current_tmp_reg); + } if (!prog->is_func || extra_pass) { out_off: diff --git a/drivers/cpufreq/sw64_cpufreq.c b/drivers/cpufreq/sw64_cpufreq.c index 5f49b5175d34f634d13e71c18c87a6e142186359..819d8f1437e284e4da4bd381527f895916426843 100644 --- a/drivers/cpufreq/sw64_cpufreq.c +++ b/drivers/cpufreq/sw64_cpufreq.c @@ -40,10 +40,8 @@ static int sw64_cpu_freq_notifier(struct notifier_block *nb, unsigned long cpu; for_each_online_cpu(cpu) { - if (val == CPUFREQ_POSTCHANGE) { + if (val == CPUFREQ_POSTCHANGE) sw64_update_clockevents(cpu, freqs->new * 1000); - current_cpu_data.loops_per_jiffy = loops_per_jiffy; - } } return 0; @@ -59,7 +57,7 @@ static unsigned int sw64_cpufreq_get(unsigned int cpu) return 0; } - return sw64_clk_get_rate(policy->clk); + return __sw64_cpufreq_get(policy) * 1000; } /* @@ -70,12 +68,12 @@ static int sw64_cpufreq_target(struct cpufreq_policy *policy, { unsigned long freq; - freq = (get_cpu_freq() / 1000) * index / 48; + freq = 50000 * index; sw64_store_policy(policy); /* setting the cpu frequency */ - sw64_set_rate(-1, freq * 1000); + sw64_set_rate(freq * 1000); return 0; } @@ -100,7 +98,7 @@ static int sw64_cpufreq_cpu_init(struct cpufreq_policy *policy) if (sw64_clockmod_table[i].frequency == 0) sw64_clockmod_table[i].frequency = (rate * i) / 48; - sw64_set_rate(-1, rate * 1000); + sw64_set_rate(rate * 1000); policy->clk = cpuclk; diff --git a/drivers/firmware/efi/sunway-init.c b/drivers/firmware/efi/sunway-init.c index 9871508df58c7f89caa401e034cf0790341b7bed..b130218634fbd52ce9e3a6f85723ee58098ed1d6 100644 --- a/drivers/firmware/efi/sunway-init.c +++ b/drivers/firmware/efi/sunway-init.c @@ -25,8 +25,6 @@ #include -extern bool __virt_addr_valid(unsigned long x); - static int __init is_memory(efi_memory_desc_t *md) { if (md->attribute & (EFI_MEMORY_WB|EFI_MEMORY_WT|EFI_MEMORY_WC)) @@ -128,23 +126,7 @@ static __init int is_usable_memory(efi_memory_desc_t *md) } return false; } -static __initdata char memory_type_name1[][20] = { - "Reserved", - "Loader Code", - "Loader Data", - "Boot Code", - "Boot Data", - "Runtime Code", - "Runtime Data", - "Conventional Memory", - "Unusable Memory", - "ACPI Reclaim Memory", - "ACPI Memory NVS", - "Memory Mapped I/O", - "MMIO Port Space", - "PAL Code", - "Persistent Memory", -}; + static __init void reserve_regions(void) { efi_memory_desc_t *md; @@ -157,22 +139,6 @@ static __init void reserve_regions(void) paddr = md->phys_addr; npages = md->num_pages; - if (!__virt_addr_valid(paddr)) - continue; - - if (md->type >= ARRAY_SIZE(memory_type_name1)) - continue; - - if (md->attribute & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT | - EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO | - EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP | - EFI_MEMORY_NV | - EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE)) - continue; - - if (strncmp(memory_type_name1[md->type], "Reserved", 8) == 0) - continue; - if (efi_enabled(EFI_DBG)) { char buf[64]; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 04eaf3a8fddba0ec118d5249af0d84d634e5006a..946f25f1079f3f3589ae62bc20eee52d905a17f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2816,7 +2816,11 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) } /* clear memory. Not sure if this is required or not */ +#if IS_ENABLED(CONFIG_SW64) + memset_io(hpd, 0, mec_hpd_size); +#else memset(hpd, 0, mec_hpd_size); +#endif amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); @@ -2926,7 +2930,11 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, u64 wb_gpu_addr; /* init the mqd struct */ +#if IS_ENABLED(CONFIG_SW64) + memset_io(mqd, 0, sizeof(struct cik_mqd)); +#else memset(mqd, 0, sizeof(struct cik_mqd)); +#endif mqd->header = 0xC0310800; mqd->compute_static_thread_mgmt_se0 = 0xffffffff; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 28c4e1fe5cd4cc2d401732917ee8a2c650539bfb..0ac2c33a0667697dd43c91b51b7d94c79c61d3fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4641,8 +4641,13 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ - if (adev->gfx.mec.mqd_backup[mqd_idx]) + if (adev->gfx.mec.mqd_backup[mqd_idx]) { +#if IS_ENABLED(CONFIG_SW64) + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); +#else memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); +#endif + } /* reset ring buffer */ ring->wptr = 0; @@ -4667,12 +4672,13 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.mec.mqd_backup[mqd_idx]) + if (adev->gfx.mec.mqd_backup[mqd_idx]) { #if IS_ENABLED(CONFIG_SW64) memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); #else memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); #endif + } } return 0; @@ -4685,7 +4691,11 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) int mqd_idx = ring - &adev->gfx.compute_ring[0]; if (!amdgpu_in_reset(adev) && !adev->in_suspend) { +#if IS_ENABLED(CONFIG_SW64) + memset_io((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); +#else memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); +#endif ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); @@ -4694,12 +4704,23 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.mec.mqd_backup[mqd_idx]) + if (adev->gfx.mec.mqd_backup[mqd_idx]) { +#if IS_ENABLED(CONFIG_SW64) + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); +#else memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); +#endif + } } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ - if (adev->gfx.mec.mqd_backup[mqd_idx]) + if (adev->gfx.mec.mqd_backup[mqd_idx]) { +#if IS_ENABLED(CONFIG_SW64) + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); +#else memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); +#endif + } + /* reset ring buffer */ ring->wptr = 0; amdgpu_ring_clear_ring(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c621ebd9003101c0fc0fdc44e2d236c730e32346..c8d1245bfc2b310cb158cf240d266ad0c90a195d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1978,7 +1978,11 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) return r; } +#if IS_ENABLED(CONFIG_SW64) + memset_io(hpd, 0, mec_hpd_size); +#else memset(hpd, 0, mec_hpd_size); +#endif amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); @@ -3724,10 +3728,11 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) { - if (IS_ENABLED(CONFIG_SW64)) - memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); - else - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); +#if IS_ENABLED(CONFIG_SW64) + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); +#else + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); +#endif } /* reset ring buffer */ @@ -3740,7 +3745,11 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } else { +#if IS_ENABLED(CONFIG_SW64) + memset_io((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); +#else memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); +#endif ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); @@ -3751,10 +3760,11 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) { - if (IS_ENABLED(CONFIG_SW64)) - memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); - else - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); +#if IS_ENABLED(CONFIG_SW64) + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); +#else + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); +#endif } } @@ -3768,7 +3778,11 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) int mqd_idx = ring - &adev->gfx.compute_ring[0]; if (!amdgpu_in_reset(adev) && !adev->in_suspend) { +#if IS_ENABLED(CONFIG_SW64) + memset_io((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); +#else memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); +#endif ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); @@ -3778,11 +3792,23 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) + if (adev->gfx.mec.mqd_backup[mqd_idx]) { +#if IS_ENABLED(CONFIG_SW64) + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); +#else memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); +#endif + } } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) + if (adev->gfx.mec.mqd_backup[mqd_idx]) { +#if IS_ENABLED(CONFIG_SW64) + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); +#else memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); +#endif + } /* reset ring buffer */ ring->wptr = 0; diff --git a/drivers/iommu/sw64/sunway_iommu.c b/drivers/iommu/sw64/sunway_iommu.c index b6c8f1272d28fc9de9575fa9d245eb8434396704..580619c6a571e8944130c622fed8e13b17cc78a1 100644 --- a/drivers/iommu/sw64/sunway_iommu.c +++ b/drivers/iommu/sw64/sunway_iommu.c @@ -1382,7 +1382,7 @@ static struct iommu_domain *sunway_iommu_domain_alloc(unsigned type) sdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); - sdomain->domain.geometry.aperture_start = SW64_DMA_START; + sdomain->domain.geometry.aperture_start = 0ULL; sdomain->domain.geometry.aperture_end = (~0ULL); sdomain->domain.geometry.force_aperture = true; sdomain->type = IOMMU_DOMAIN_UNMANAGED;