diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst index ad13ec55ddfe5110ab8922a5aafe1732951209de..9ca5a45c2140a9f3dfc1dc58b28a871f4c3b844f 100644 --- a/Documentation/virt/kvm/index.rst +++ b/Documentation/virt/kvm/index.rst @@ -14,6 +14,7 @@ KVM s390/index ppc-pv x86/index + loongarch/index locking vcpu-requests diff --git a/Documentation/virt/kvm/loongarch/hypercalls.rst b/Documentation/virt/kvm/loongarch/hypercalls.rst new file mode 100644 index 0000000000000000000000000000000000000000..1679e48d67d28c4ee30cdc9120d74ae2f54cda57 --- /dev/null +++ b/Documentation/virt/kvm/loongarch/hypercalls.rst @@ -0,0 +1,79 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================================== +The LoongArch paravirtual interface +=================================== + +KVM hypercalls use the HVCL instruction with code 0x100, and the hypercall +number is put in a0 and up to five arguments may be placed in a1-a5, the +return value is placed in v0 (alias with a0). + +The code for that interface can be found in arch/loongarch/kvm/* + +Querying for existence +====================== + +To find out if we're running on KVM or not, cpucfg can be used with index +CPUCFG_KVM_BASE (0x40000000), cpucfg range between 0x40000000 - 0x400000FF +is marked as a specially reserved range. All existing and future processors +will not implement any features in this range. + +When Linux is running on KVM, cpucfg with index CPUCFG_KVM_BASE (0x40000000) +returns magic string "KVM\0" + +Once you determined you're running under a PV capable KVM, you can now use +hypercalls as described below. + +KVM hypercall ABI +================= + +Hypercall ABI on KVM is simple, only one scratch register a0 (v0) and at most +five generic registers used as input parameter. FP register and vector register +is not used for input register and should not be modified during hypercall. +Hypercall function can be inlined since there is only one scratch register. + +The parameters are as follows: + + ======== ================ ================ + Register IN OUT + ======== ================ ================ + a0 function number Return code + a1 1st parameter - + a2 2nd parameter - + a3 3rd parameter - + a4 4th parameter - + a5 5th parameter - + ======== ================ ================ + +Return codes can be as follows: + + ==== ========================= + Code Meaning + ==== ========================= + 0 Success + -1 Hypercall not implemented + -2 Hypercall parameter error + ==== ========================= + +KVM Hypercalls Documentation +============================ + +The template for each hypercall is: +1. Hypercall name +2. Purpose + +1. KVM_HCALL_FUNC_PV_IPI +------------------------ + +:Purpose: Send IPIs to multiple vCPUs. + +- a0: KVM_HCALL_FUNC_PV_IPI +- a1: lower part of the bitmap of destination physical CPUIDs +- a2: higher part of the bitmap of destination physical CPUIDs +- a3: the lowest physical CPUID in bitmap + +The hypercall lets a guest send multicast IPIs, with at most 128 +destinations per hypercall. The destinations are represented by a bitmap +contained in the first two arguments (a1 and a2). Bit 0 of a1 corresponds +to the physical CPUID in the third argument (a3), bit 1 corresponds to the +physical ID a3+1, and so on. diff --git a/Documentation/virt/kvm/loongarch/index.rst b/Documentation/virt/kvm/loongarch/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..83387b4c53455033acaac2ddceb404d898ccaa39 --- /dev/null +++ b/Documentation/virt/kvm/loongarch/index.rst @@ -0,0 +1,10 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================= +KVM for LoongArch systems +========================= + +.. toctree:: + :maxdepth: 2 + + hypercalls.rst diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index bad326ae58f29c7e4492b88eb328b8ccbffc0848..7b82992af3c4ceb6a9393f861b872d3f64152636 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -565,6 +565,15 @@ config CPU_HAS_PREFETCH bool default y +config PARAVIRT + bool "Enable paravirtualization code" + depends on AS_HAS_LVZ_EXTENSION + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. However, when run without a hypervisor + the kernel is theoretically slower and slightly larger. + config ARCH_SUPPORTS_KEXEC def_bool y diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 93783fa24f6e9b634be44a3634346524eaec3811..22991a6f0e2b5500890f49b5c1d51848163b58c2 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -23,4 +23,3 @@ generic-y += poll.h generic-y += param.h generic-y += posix_types.h generic-y += resource.h -generic-y += kvm_para.h diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h index 0ef3b18f89803708d6e8a96b37c9ff037cc2e264..b26d596a73aa249daff90e2603259a22440001d5 100644 --- a/arch/loongarch/include/asm/hardirq.h +++ b/arch/loongarch/include/asm/hardirq.h @@ -12,11 +12,16 @@ extern void ack_bad_irq(unsigned int irq); #define ack_bad_irq ack_bad_irq +enum ipi_msg_type { + IPI_RESCHEDULE, + IPI_CALL_FUNCTION, +}; #define NR_IPI 2 typedef struct { unsigned int ipi_irqs[NR_IPI]; unsigned int __softirq_pending; + atomic_t message ____cacheline_aligned_in_smp; } ____cacheline_aligned irq_cpustat_t; DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 008a88ead60d9a55a4a8ffb0640b53bff5a127ba..e4c545fecaeab1a7aa38466be97e39ca075f4e30 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -12,6 +12,7 @@ #define INSN_NOP 0x03400000 #define INSN_BREAK 0x002a0000 +#define INSN_HVCL 0x002b8000 #define ADDR_IMMMASK_LU52ID 0xFFF0000000000000 #define ADDR_IMMMASK_LU32ID 0x000FFFFF00000000 @@ -65,6 +66,7 @@ enum reg2_op { revbd_op = 0x0f, revh2w_op = 0x10, revhd_op = 0x11, + cpucfg_op = 0x1b, iocsrrdb_op = 0x19200, iocsrrdh_op = 0x19201, iocsrrdw_op = 0x19202, diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index ed8e72db0dbac2dadf2447e299fc8ebcfd7b0442..85a3315597b6b34a72752f97cda525bade0b18ed 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -118,9 +118,18 @@ extern struct fwnode_handle *liointc_handle; extern struct fwnode_handle *pch_lpc_handle; extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS]; -extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev); extern void fixup_irqs(void); +static inline int get_percpu_irq(int vector) +{ + struct irq_domain *d; + + d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); + if (d) + return irq_create_mapping(d, vector); + + return -EINVAL; +} #include #endif /* _ASM_IRQ_H */ diff --git a/arch/loongarch/include/asm/kvm_csr.h b/arch/loongarch/include/asm/kvm_csr.h index 724ca8b7b4011b3277103aace069c8b4981114a0..476c9f620dd52b64a9fb4b98757bf08da5173f78 100644 --- a/arch/loongarch/include/asm/kvm_csr.h +++ b/arch/loongarch/include/asm/kvm_csr.h @@ -208,4 +208,9 @@ static __always_inline void kvm_change_sw_gcsr(struct loongarch_csrs *csr, csr->csrs[gid] |= val & _mask; } +#define KVM_PMU_PLV_ENABLE (CSR_PERFCTRL_PLV0 | \ + CSR_PERFCTRL_PLV1 | \ + CSR_PERFCTRL_PLV2 | \ + CSR_PERFCTRL_PLV3) + #endif /* __ASM_LOONGARCH_KVM_CSR_H__ */ diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 5bdb34b2c5d677099aa02b8800060864aecea72a..c146d2ebdb90128fa737d441fa5fedadf04fddf0 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -30,7 +30,10 @@ #define KVM_PRIVATE_MEM_SLOTS 0 #define KVM_HALT_POLL_NS_DEFAULT 500000 +#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(1) +#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ + KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP) struct kvm_vm_stat { struct kvm_vm_stat_generic generic; u64 pages; @@ -43,6 +46,7 @@ struct kvm_vcpu_stat { u64 idle_exits; u64 cpucfg_exits; u64 signal_exits; + u64 hypercall_exits; }; #define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0) @@ -64,6 +68,30 @@ struct kvm_world_switch { #define MAX_PGTABLE_LEVELS 4 +/* + * Physical cpu id is used for interrupt routing, there are different + * definitions about physical cpuid on different hardwares. + * For LOONGARCH_CSR_CPUID register, max cpuid size if 512 + * For IPI HW, max dest CPUID size 1024 + * For extioi interrupt controller, max dest CPUID size is 256 + * For MSI interrupt controller, max supported CPUID size is 65536 + * + * Currently max CPUID is defined as 256 for KVM hypervisor, in future + * it will be expanded to 4096, including 16 packages at most. And every + * package supports at most 256 vcpus + */ +#define KVM_MAX_PHYID 256 + +struct kvm_phyid_info { + struct kvm_vcpu *vcpu; + bool enabled; +}; + +struct kvm_phyid_map { + int max_phyid; + struct kvm_phyid_info phys_map[KVM_MAX_PHYID]; +}; + struct kvm_arch { /* Guest physical mm */ kvm_pte_t *pgd; @@ -71,6 +99,8 @@ struct kvm_arch { unsigned long invalid_ptes[MAX_PGTABLE_LEVELS]; unsigned int pte_shifts[MAX_PGTABLE_LEVELS]; unsigned int root_level; + spinlock_t phyid_map_lock; + struct kvm_phyid_map *phyid_map; s64 time_offset; struct kvm_context __percpu *vmcs; @@ -99,6 +129,7 @@ enum emulation_result { #define KVM_LARCH_LASX (0x1 << 2) #define KVM_LARCH_SWCSR_LATEST (0x1 << 3) #define KVM_LARCH_HWCSR_USABLE (0x1 << 4) +#define KVM_LARCH_PERF (0x1 << 5) struct kvm_vcpu_arch { /* @@ -168,6 +199,15 @@ struct kvm_vcpu_arch { struct kvm_mp_state mp_state; /* cpucfg */ u32 cpucfg[KVM_MAX_CPUCFG_REGS]; + /* paravirt steal time */ + struct { + u64 guest_addr; + u64 last_steal; + struct gfn_to_hva_cache cache; + } st; + /* Save host pmu csr */ + u64 perf_ctrl[4]; + u64 perf_cntr[4]; }; static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg) @@ -195,6 +235,16 @@ static inline bool kvm_guest_has_lasx(struct kvm_vcpu_arch *arch) return arch->cpucfg[2] & CPUCFG2_LASX; } +static inline bool kvm_guest_has_pmu(struct kvm_vcpu_arch *arch) +{ + return arch->cpucfg[6] & CPUCFG6_PMP; +} + +static inline int kvm_get_pmu_num(struct kvm_vcpu_arch *arch) +{ + return (arch->cpucfg[6] & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT; +} + /* Debug: dump vcpu state */ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h new file mode 100644 index 0000000000000000000000000000000000000000..032101b941d92fea374afbb18877ed67314007dc --- /dev/null +++ b/arch/loongarch/include/asm/kvm_para.h @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_KVM_PARA_H +#define _ASM_LOONGARCH_KVM_PARA_H + +/* + * Hypercall code field + */ +#define HYPERVISOR_KVM 1 +#define HYPERVISOR_VENDOR_SHIFT 8 +#define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code) +#define KVM_HCALL_CODE_PV_SERVICE 0 +#define KVM_HCALL_CODE_SWDBG 1 +#define KVM_HCALL_PV_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_PV_SERVICE) +#define KVM_HCALL_FUNC_PV_IPI 1 +#define KVM_HCALL_FUNC_NOTIFY 2 +#define KVM_HCALL_SWDBG HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG) + +/* + * LoongArch hypercall return code + */ +#define KVM_HCALL_STATUS_SUCCESS 0 +#define KVM_HCALL_INVALID_CODE -1UL +#define KVM_HCALL_INVALID_PARAMETER -2UL + +#define KVM_STEAL_PHYS_VALID BIT_ULL(0) +#define KVM_STEAL_PHYS_MASK GENMASK_ULL(63, 6) +struct kvm_steal_time { + __u64 steal; + __u32 version; + __u32 flags; + __u32 pad[12]; +}; + +/* + * Hypercall interface for KVM hypervisor + * + * a0: function identifier + * a1-a6: args + * Return value will be placed in v0. + * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6. + */ +static __always_inline long kvm_hypercall(u64 fid) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + : "=r" (ret) + : "r" (fun) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + : "=r" (ret) + : "r" (fun), "r" (a1) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall2(u64 fid, + unsigned long arg0, unsigned long arg1) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + : "=r" (ret) + : "r" (fun), "r" (a1), "r" (a2) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall3(u64 fid, + unsigned long arg0, unsigned long arg1, unsigned long arg2) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + register unsigned long a3 asm("a3") = arg2; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + : "=r" (ret) + : "r" (fun), "r" (a1), "r" (a2), "r" (a3) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall4(u64 fid, + unsigned long arg0, unsigned long arg1, unsigned long arg2, + unsigned long arg3) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + register unsigned long a3 asm("a3") = arg2; + register unsigned long a4 asm("a4") = arg3; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + : "=r" (ret) + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall5(u64 fid, + unsigned long arg0, unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + register unsigned long a3 asm("a3") = arg2; + register unsigned long a4 asm("a4") = arg3; + register unsigned long a5 asm("a5") = arg4; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + : "=r" (ret) + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5) + : "memory" + ); + + return ret; +} + + +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + +static inline unsigned int kvm_arch_para_hints(void) +{ + return 0; +} + +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} +#endif /* _ASM_LOONGARCH_KVM_PARA_H */ diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index 0cb4fdb8a9b5970dfefb24a34c82d1451ff27fa9..1da24994b838a9acbc905e5130cba451261651d2 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -75,12 +75,15 @@ static inline void kvm_save_lasx(struct loongarch_fpu *fpu) { } static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { } #endif +int kvm_own_pmu(struct kvm_vcpu *vcpu); + void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz); void kvm_reset_timer(struct kvm_vcpu *vcpu); void kvm_save_timer(struct kvm_vcpu *vcpu); void kvm_restore_timer(struct kvm_vcpu *vcpu); int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid); /* * Loongarch KVM guest interrupt handling diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 67be9804903802f08d0ff46e02179c903c1c7920..e852c0f62eb7096c08e008fbeecf390219fe1f3f 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -119,6 +119,7 @@ #define CPUCFG6_PMP BIT(0) #define CPUCFG6_PAMVER GENMASK(3, 1) #define CPUCFG6_PMNUM GENMASK(7, 4) +#define CPUCFG6_PMNUM_SHIFT 4 #define CPUCFG6_PMBITS GENMASK(13, 8) #define CPUCFG6_UPM BIT(14) @@ -158,6 +159,18 @@ #define CPUCFG48_VFPU_CG BIT(2) #define CPUCFG48_RAM_CG BIT(3) +/* + * cpucfg index area: 0x40000000 -- 0x400000ff + * SW emulation for KVM hypervirsor + */ +#define CPUCFG_KVM_BASE 0x40000000UL +#define CPUCFG_KVM_SIZE 0x100 +#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE +#define KVM_SIGNATURE "KVM\0" +#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4) +#define KVM_FEATURE_PV_IPI BIT(1) +#define KVM_FEATURE_STEAL_TIME BIT(2) + #ifndef __ASSEMBLY__ /* CSR */ diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h new file mode 100644 index 0000000000000000000000000000000000000000..fe27fb5e82b88fa3546b56441ae2cbc0b590350f --- /dev/null +++ b/arch/loongarch/include/asm/paravirt.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_PARAVIRT_H +#define _ASM_LOONGARCH_PARAVIRT_H + +#ifdef CONFIG_PARAVIRT +#include +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +u64 dummy_steal_clock(int cpu); +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); + +static inline u64 paravirt_steal_clock(int cpu) +{ + return static_call(pv_steal_clock)(cpu); +} + +int pv_ipi_init(void); +int __init pv_time_init(void); +#else +static inline int pv_ipi_init(void) +{ + return 0; +} + +static inline int pv_time_init(void) +{ + return 0; +} +#endif // CONFIG_PARAVIRT +#endif diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h b/arch/loongarch/include/asm/paravirt_api_clock.h new file mode 100644 index 0000000000000000000000000000000000000000..8a418f0b4fd537164e5f5fa5a0bdbc5886fcee1f --- /dev/null +++ b/arch/loongarch/include/asm/paravirt_api_clock.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ +#ifndef _ASM_API_CLOCK_H +#define _ASM_API_CLOCK_H + +#include + +#endif diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index f81e5f01d61905f5b8d7da4786ba512258381acd..75d30529748c944748f2d70f8389c7fee509f69c 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -12,6 +12,13 @@ #include #include +struct smp_ops { + void (*init_ipi)(void); + void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action); + void (*send_ipi_single)(int cpu, unsigned int action); +}; + +extern struct smp_ops smp_ops; extern int smp_num_siblings; extern int num_processors; extern int disabled_cpus; @@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus); void loongson_boot_secondary(int cpu, struct task_struct *idle); void loongson_init_secondary(void); void loongson_smp_finish(void); -void loongson_send_ipi_single(int cpu, unsigned int action); -void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action); #ifdef CONFIG_HOTPLUG_CPU int loongson_cpu_disable(void); void loongson_cpu_die(unsigned int cpu); @@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS]; #define cpu_physical_id(cpu) cpu_logical_map(cpu) -#define SMP_BOOT_CPU 0x1 -#define SMP_RESCHEDULE 0x2 -#define SMP_CALL_FUNCTION 0x4 +#define ACTION_BOOT_CPU 0 +#define ACTION_RESCHEDULE 1 +#define ACTION_CALL_FUNCTION 2 +#define SMP_BOOT_CPU BIT(ACTION_BOOT_CPU) +#define SMP_RESCHEDULE BIT(ACTION_RESCHEDULE) +#define SMP_CALL_FUNCTION BIT(ACTION_CALL_FUNCTION) struct secondary_data { unsigned long stack; @@ -71,7 +79,8 @@ extern struct secondary_data cpuboot_data; extern asmlinkage void smpboot_entry(void); extern asmlinkage void start_secondary(void); - +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); extern void calculate_cpu_foreign_map(void); /* @@ -79,16 +88,6 @@ extern void calculate_cpu_foreign_map(void); */ extern void show_ipi_list(struct seq_file *p, int prec); -static inline void arch_send_call_function_single_ipi(int cpu) -{ - loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION); -} - -static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) -{ - loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION); -} - #ifdef CONFIG_HOTPLUG_CPU static inline int __cpu_disable(void) { diff --git a/arch/loongarch/include/uapi/asm/Kbuild b/arch/loongarch/include/uapi/asm/Kbuild deleted file mode 100644 index 4aa680ca2e5fdf6407f8692264599da101d46ab3..0000000000000000000000000000000000000000 --- a/arch/loongarch/include/uapi/asm/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -generic-y += kvm_para.h diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 923d0bd382941acc5794d7622f7adfe1b2533422..9891ed93816a3af2cf08713602ccd8d5b8e35556 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -15,10 +15,12 @@ */ #define __KVM_HAVE_READONLY_MEM +#define __KVM_HAVE_GUEST_DEBUG #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 +#define KVM_GUESTDBG_USE_SW_BP 0x00010000 /* * for KVM_GET_REGS and KVM_SET_REGS */ @@ -74,12 +76,18 @@ struct kvm_fpu { #define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1) #define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2) +/* Debugging: Special instruction for software breakpoint */ +#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) #define LOONGARCH_REG_SHIFT 3 #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) #define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) + +/* Device Control API on vcpu fd */ #define KVM_LOONGARCH_VCPU_CPUCFG 0 +#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1 +#define KVM_LOONGARCH_VCPU_PVTIME_GPA 0 struct kvm_debug_exit_arch { }; diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 10ee5fc7ac3eb1ae5295ff474672af22641485cb..6c148ccea67440cb148b517d9b7e9a75ad0bb744 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -49,6 +49,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index a1c1d95763934eec5fc0c9ad2fe14b1347cf88ef..8b21449a70920ee72310f18ec1136ed89e939d3f 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -86,16 +86,6 @@ static void __init init_vec_parent_group(void) acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse); } -static int __init get_ipi_irq(void) -{ - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); - - if (d) - return irq_create_mapping(d, INT_IPI); - - return -EINVAL; -} - #ifdef CONFIG_HOTPLUG_CPU static void handle_irq_affinity(void) { @@ -135,10 +125,6 @@ void fixup_irqs(void) void __init init_IRQ(void) { int i, ret; -#ifdef CONFIG_SMP - int r, ipi_irq; - static int ipi_dummy_dev; -#endif unsigned int order = get_order(IRQ_STACK_SIZE); struct page *page; @@ -154,13 +140,7 @@ void __init init_IRQ(void) irqchip_init(); } #ifdef CONFIG_SMP - ipi_irq = get_ipi_irq(); - if (ipi_irq < 0) - panic("IPI IRQ mapping failed\n"); - irq_set_percpu_devid(ipi_irq); - r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &ipi_dummy_dev); - if (r < 0) - panic("IPI IRQ request failed\n"); + smp_ops.init_ipi(); #endif for (i = 0; i < NR_IRQS; i++) @@ -174,5 +154,5 @@ void __init init_IRQ(void) per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE); } - set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC); + set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC); } diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c new file mode 100644 index 0000000000000000000000000000000000000000..56182c64ab38ebfd7b218a45e45aa725a0a9d48e --- /dev/null +++ b/arch/loongarch/kernel/paravirt.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include + +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; +static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); +static int has_steal_clock; + +static u64 native_steal_clock(int cpu) +{ + return 0; +} + +DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); + +static bool steal_acc = true; +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} +early_param("no-steal-acc", parse_no_stealacc); + +static u64 para_steal_clock(int cpu) +{ + u64 steal; + struct kvm_steal_time *src; + int version; + + src = &per_cpu(steal_time, cpu); + do { + + version = src->version; + /* Make sure that the version is read before the steal */ + virt_rmb(); + steal = src->steal; + /* Make sure that the steal is read before the next version */ + virt_rmb(); + + } while ((version & 1) || (version != src->version)); + return steal; +} + +static int pv_register_steal_time(void) +{ + int cpu = smp_processor_id(); + struct kvm_steal_time *st; + unsigned long addr; + + if (!has_steal_clock) + return -EPERM; + + st = &per_cpu(steal_time, cpu); + addr = per_cpu_ptr_to_phys(st); + + /* The whole structure kvm_steal_time should be one page */ + if (PFN_DOWN(addr) != PFN_DOWN(addr + sizeof(*st))) { + pr_warn("Illegal PV steal time addr %lx\n", addr); + return -EFAULT; + } + + addr |= KVM_STEAL_PHYS_VALID; + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, addr); + return 0; +} + +#ifdef CONFIG_SMP +static void pv_send_ipi_single(int cpu, unsigned int action) +{ + unsigned int min, old; + irq_cpustat_t *info = &per_cpu(irq_stat, cpu); + + old = atomic_fetch_or(BIT(action), &info->message); + if (old) + return; + + min = cpu_logical_map(cpu); + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, 1, 0, min); +} + +#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) +static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action) +{ + unsigned int cpu, i, min = 0, max = 0, old; + __uint128_t bitmap = 0; + irq_cpustat_t *info; + + if (cpumask_empty(mask)) + return; + + action = BIT(action); + for_each_cpu(i, mask) { + info = &per_cpu(irq_stat, i); + old = atomic_fetch_or(action, &info->message); + if (old) + continue; + + cpu = cpu_logical_map(i); + if (!bitmap) { + min = max = cpu; + } else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) { + max = cpu > max ? cpu : max; + } else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) { + bitmap <<= min - cpu; + min = cpu; + } else { + /* + * Physical cpuid is sorted in ascending order ascend + * for the next mask calculation, send IPI here + * directly and skip the remainding cpus + */ + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, + (unsigned long)bitmap, + (unsigned long)(bitmap >> BITS_PER_LONG), min); + min = max = cpu; + bitmap = 0; + } + __set_bit(cpu - min, (unsigned long *)&bitmap); + } + + if (bitmap) + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, (unsigned long)bitmap, + (unsigned long)(bitmap >> BITS_PER_LONG), min); +} + +static irqreturn_t loongson_do_swi(int irq, void *dev) +{ + irq_cpustat_t *info; + long action; + + /* Clear swi interrupt */ + clear_csr_estat(1 << INT_SWI0); + info = this_cpu_ptr(&irq_stat); + action = atomic_xchg(&info->message, 0); + if (action & SMP_CALL_FUNCTION) { + generic_smp_call_function_interrupt(); + info->ipi_irqs[IPI_CALL_FUNCTION]++; + } + + if (action & SMP_RESCHEDULE) { + scheduler_ipi(); + info->ipi_irqs[IPI_RESCHEDULE]++; + } + + return IRQ_HANDLED; +} + +static void pv_init_ipi(void) +{ + int r, swi0; + + swi0 = get_percpu_irq(INT_SWI0); + if (swi0 < 0) + panic("SWI0 IRQ mapping failed\n"); + irq_set_percpu_devid(swi0); + r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat); + if (r < 0) + panic("SWI0 IRQ request failed\n"); +} + +static void pv_disable_steal_time(void) +{ + if (has_steal_clock) + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, 0); +} + +static int pv_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + pv_register_steal_time(); + local_irq_restore(flags); + return 0; +} + +static int pv_cpu_down_prepare(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + pv_disable_steal_time(); + local_irq_restore(flags); + return 0; +} +#endif + +static bool kvm_para_available(void) +{ + static int hypervisor_type; + int config; + + if (!hypervisor_type) { + config = read_cpucfg(CPUCFG_KVM_SIG); + if (!memcmp(&config, KVM_SIGNATURE, 4)) + hypervisor_type = HYPERVISOR_KVM; + } + + return hypervisor_type == HYPERVISOR_KVM; +} + +int __init pv_ipi_init(void) +{ + int feature; + + if (!cpu_has_hypervisor) + return 0; + if (!kvm_para_available()) + return 0; + + /* + * check whether KVM hypervisor supports pv_ipi or not + */ + feature = read_cpucfg(CPUCFG_KVM_FEATURE); +#ifdef CONFIG_SMP + if (feature & KVM_FEATURE_PV_IPI) { + smp_ops.init_ipi = pv_init_ipi; + smp_ops.send_ipi_single = pv_send_ipi_single; + smp_ops.send_ipi_mask = pv_send_ipi_mask; + } +#endif + + return 1; +} + +static void pv_cpu_reboot(void *unused) +{ + pv_disable_steal_time(); +} + +static int pv_reboot_notify(struct notifier_block *nb, unsigned long code, + void *unused) +{ + on_each_cpu(pv_cpu_reboot, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block pv_reboot_nb = { + .notifier_call = pv_reboot_notify, +}; + +int __init pv_time_init(void) +{ + int feature; + + if (!cpu_has_hypervisor) + return 0; + if (!kvm_para_available()) + return 0; + + feature = read_cpucfg(CPUCFG_KVM_FEATURE); + if (!(feature & KVM_FEATURE_STEAL_TIME)) + return 0; + + has_steal_clock = 1; + if (pv_register_steal_time()) { + has_steal_clock = 0; + return 0; + } + + register_reboot_notifier(&pv_reboot_nb); + static_call_update(pv_steal_clock, para_steal_clock); + static_key_slow_inc(¶virt_steal_enabled); + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); + +#ifdef CONFIG_SMP + if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "loongarch/pv:online", + pv_cpu_online, pv_cpu_down_prepare) < 0) + pr_err("Failed to install cpu hotplug callbacks\n"); +#endif + pr_info("Using stolen time PV\n"); + return 0; +} diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index 0491bf453cd49601c4f8b7b35565ea4a2b83c689..3265c8f33223fc9da4a1deb38dae70d411e83b38 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -456,16 +456,6 @@ static void loongarch_pmu_disable(struct pmu *pmu) static DEFINE_MUTEX(pmu_reserve_mutex); static atomic_t active_events = ATOMIC_INIT(0); -static int get_pmc_irq(void) -{ - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); - - if (d) - return irq_create_mapping(d, INT_PCOV); - - return -EINVAL; -} - static void reset_counters(void *arg); static int __hw_perf_event_init(struct perf_event *event); @@ -473,7 +463,7 @@ static void hw_perf_event_destroy(struct perf_event *event) { if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) { on_each_cpu(reset_counters, NULL, 1); - free_irq(get_pmc_irq(), &loongarch_pmu); + free_irq(get_percpu_irq(INT_PCOV), &loongarch_pmu); mutex_unlock(&pmu_reserve_mutex); } } @@ -562,7 +552,7 @@ static int loongarch_pmu_event_init(struct perf_event *event) if (event->cpu >= 0 && !cpu_online(event->cpu)) return -ENODEV; - irq = get_pmc_irq(); + irq = get_percpu_irq(INT_PCOV); flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED; if (!atomic_inc_not_zero(&active_events)) { mutex_lock(&pmu_reserve_mutex); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 5fbdd172a2488953965e69ed6dc4d49df14eac70..897127c2638846e07a137755ce0f524facec2b73 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -67,11 +68,6 @@ static cpumask_t cpu_core_setup_map; struct secondary_data cpuboot_data; static DEFINE_PER_CPU(int, cpu_state); -enum ipi_msg_type { - IPI_RESCHEDULE, - IPI_CALL_FUNCTION, -}; - static const char *ipi_types[NR_IPI] __tracepoint_string = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNCTION] = "Function call interrupts", @@ -191,24 +187,19 @@ static u32 ipi_read_clear(int cpu) static void ipi_write_action(int cpu, u32 action) { - unsigned int irq = 0; - - while ((irq = ffs(action))) { - uint32_t val = IOCSR_IPI_SEND_BLOCKING; + uint32_t val; - val |= (irq - 1); - val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT); - iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND); - action &= ~BIT(irq - 1); - } + val = IOCSR_IPI_SEND_BLOCKING | action; + val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT); + iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND); } -void loongson_send_ipi_single(int cpu, unsigned int action) +static void loongson_send_ipi_single(int cpu, unsigned int action) { ipi_write_action(cpu_logical_map(cpu), (u32)action); } -void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) +static void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; @@ -216,6 +207,16 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) ipi_write_action(cpu_logical_map(i), (u32)action); } +void arch_send_call_function_single_ipi(int cpu) +{ + smp_ops.send_ipi_single(cpu, ACTION_CALL_FUNCTION); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + smp_ops.send_ipi_mask(mask, ACTION_CALL_FUNCTION); +} + /* * This function sends a 'reschedule' IPI to another CPU. * it goes straight through and wastes no time serializing @@ -223,11 +224,11 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) */ void arch_smp_send_reschedule(int cpu) { - loongson_send_ipi_single(cpu, SMP_RESCHEDULE); + smp_ops.send_ipi_single(cpu, ACTION_RESCHEDULE); } EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); -irqreturn_t loongson_ipi_interrupt(int irq, void *dev) +static irqreturn_t loongson_ipi_interrupt(int irq, void *dev) { unsigned int action; unsigned int cpu = smp_processor_id(); @@ -247,6 +248,26 @@ irqreturn_t loongson_ipi_interrupt(int irq, void *dev) return IRQ_HANDLED; } +static void loongson_init_ipi(void) +{ + int r, ipi_irq; + + ipi_irq = get_percpu_irq(INT_IPI); + if (ipi_irq < 0) + panic("IPI IRQ mapping failed\n"); + + irq_set_percpu_devid(ipi_irq); + r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &irq_stat); + if (r < 0) + panic("IPI IRQ request failed\n"); +} + +struct smp_ops smp_ops = { + .init_ipi = loongson_init_ipi, + .send_ipi_single = loongson_send_ipi_single, + .send_ipi_mask = loongson_send_ipi_mask, +}; + static void __init fdt_smp_setup(void) { #ifdef CONFIG_OF @@ -287,6 +308,7 @@ void __init loongson_smp_setup(void) cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package; cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; + pv_ipi_init(); iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN); pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus); } @@ -322,7 +344,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle) csr_mail_send(entry, cpu_logical_map(cpu), 0); - loongson_send_ipi_single(cpu, SMP_BOOT_CPU); + loongson_send_ipi_single(cpu, ACTION_BOOT_CPU); } /* @@ -331,7 +353,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle) void loongson_init_secondary(void) { unsigned int cpu = smp_processor_id(); - unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | + unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER; change_csr_ecfg(ECFG0_IM, imask); diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index e7015f7b70e37c4cabf736512c50a998455bbdf9..46d7d40c87e38e097af74385be1e518bf95d5251 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -15,6 +15,7 @@ #include #include +#include #include u64 cpu_clock_freq; @@ -123,16 +124,6 @@ void sync_counter(void) csr_write64(init_offset, LOONGARCH_CSR_CNTC); } -static int get_timer_irq(void) -{ - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); - - if (d) - return irq_create_mapping(d, INT_TI); - - return -EINVAL; -} - int constant_clockevent_init(void) { unsigned int cpu = smp_processor_id(); @@ -142,7 +133,7 @@ int constant_clockevent_init(void) static int irq = 0, timer_irq_installed = 0; if (!timer_irq_installed) { - irq = get_timer_irq(); + irq = get_percpu_irq(INT_TI); if (irq < 0) pr_err("Failed to map irq %d (timer)\n", irq); } @@ -224,4 +215,5 @@ void __init time_init(void) constant_clockevent_init(); constant_clocksource_init(); + pv_time_init(); } diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index ed1d89d53e2e6da0c8b73ed17d97146721347d16..8affc6d4a66e6973473673fb176b39064b53287c 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -83,6 +83,13 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst) rj = inst.reg2csr_format.rj; csrid = inst.reg2csr_format.csr; + if (csrid >= LOONGARCH_CSR_PERFCTRL0 && csrid <= LOONGARCH_CSR_PERFCNTR3) { + if (!kvm_own_pmu(vcpu)) { + vcpu->arch.pc -= 4; + return EMULATE_DONE; + } + } + /* Process CSR ops */ switch (rj) { case 0: /* process csrrd */ @@ -206,10 +213,59 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu) return EMULATE_DONE; } -static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) +static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) { int rd, rj; - unsigned int index; + unsigned int index, ret; + unsigned long plv; + + rd = inst.reg2_format.rd; + rj = inst.reg2_format.rj; + ++vcpu->stat.cpucfg_exits; + index = vcpu->arch.gprs[rj]; + + /* + * By LoongArch Reference Manual 2.2.10.5 + * Return value is 0 for undefined cpucfg index + * + * Disable preemption since hw gcsr is accessed + */ + preempt_disable(); + plv = kvm_read_hw_gcsr(LOONGARCH_CSR_CRMD) >> CSR_CRMD_PLV_SHIFT; + switch (index) { + case 0 ... (KVM_MAX_CPUCFG_REGS - 1): + vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index]; + break; + case CPUCFG_KVM_SIG: + /* + * Cpucfg emulation between 0x40000000 -- 0x400000ff + * Return value with 0 if executed in user mode + */ + if ((plv & CSR_CRMD_PLV) == PLV_KERN) + vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE; + else + vcpu->arch.gprs[rd] = 0; + break; + case CPUCFG_KVM_FEATURE: + ret = 0; + if ((plv & CSR_CRMD_PLV) == PLV_KERN) { + ret = KVM_FEATURE_PV_IPI; + if (sched_info_on()) + ret |= KVM_FEATURE_STEAL_TIME; + } + vcpu->arch.gprs[rd] = ret; + break; + default: + vcpu->arch.gprs[rd] = 0; + break; + } + + preempt_enable(); + return EMULATE_DONE; +} + +static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) +{ unsigned long curr_pc; larch_inst inst; enum emulation_result er = EMULATE_DONE; @@ -224,21 +280,8 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) er = EMULATE_FAIL; switch (((inst.word >> 24) & 0xff)) { case 0x0: /* CPUCFG GSPR */ - if (inst.reg2_format.opcode == 0x1B) { - rd = inst.reg2_format.rd; - rj = inst.reg2_format.rj; - ++vcpu->stat.cpucfg_exits; - index = vcpu->arch.gprs[rj]; - er = EMULATE_DONE; - /* - * By LoongArch Reference Manual 2.2.10.5 - * return value is 0 for undefined cpucfg index - */ - if (index < KVM_MAX_CPUCFG_REGS) - vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index]; - else - vcpu->arch.gprs[rd] = 0; - } + if (inst.reg2_format.opcode == cpucfg_op) + er = kvm_emu_cpucfg(vcpu, inst); break; case 0x4: /* CSR{RD,WR,XCHG} GSPR */ er = kvm_handle_csr(vcpu, inst); @@ -685,6 +728,115 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu) +{ + unsigned long ipi_bitmap; + unsigned int min, cpu, i; + struct kvm_vcpu *dest; + + min = vcpu->arch.gprs[LOONGARCH_GPR_A3]; + for (i = 0; i < 2; i++, min += BITS_PER_LONG) { + ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1 + i]; + if (!ipi_bitmap) + continue; + + cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG); + while (cpu < BITS_PER_LONG) { + dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min); + cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG, + cpu + 1); + if (!dest) + continue; + + /* + * Send SWI0 to dest vcpu to emulate IPI interrupt + */ + kvm_queue_irq(dest, INT_SWI0); + kvm_vcpu_kick(dest); + } + } + + return 0; +} + +static int kvm_save_notify(struct kvm_vcpu *vcpu) +{ + unsigned long id, data; + + id = vcpu->arch.gprs[LOONGARCH_GPR_A1]; + data = vcpu->arch.gprs[LOONGARCH_GPR_A2]; + switch (id) { + case KVM_FEATURE_STEAL_TIME: + vcpu->arch.st.guest_addr = data; + vcpu->arch.st.last_steal = current->sched_info.run_delay; + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); + break; + default: + break; + }; + + return 0; +}; + +/* + * hypercall emulation always return to guest, Caller should check retval. + */ +static void kvm_handle_pv_service(struct kvm_vcpu *vcpu) +{ + unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0]; + long ret; + + switch (func) { + case KVM_HCALL_FUNC_PV_IPI: + kvm_pv_send_ipi(vcpu); + ret = KVM_HCALL_STATUS_SUCCESS; + break; + case KVM_HCALL_FUNC_NOTIFY: + ret = kvm_save_notify(vcpu); + break; + default: + ret = KVM_HCALL_INVALID_CODE; + break; + }; + + vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret; +} + +static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) +{ + larch_inst inst; + unsigned int code; + int ret; + + inst.word = vcpu->arch.badi; + code = inst.reg0i15_format.immediate; + ret = RESUME_GUEST; + + switch (code) { + case KVM_HCALL_PV_SERVICE: + vcpu->stat.hypercall_exits++; + kvm_handle_pv_service(vcpu); + break; + case KVM_HCALL_SWDBG: + /* KVM_HC_SWDBG only in effective when SW_BP is enabled */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + ret = RESUME_HOST; + } else + vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE; + break; + default: + /* Treat it as noop intruction, only set return value */ + vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE; + break; + } + + if (ret == RESUME_GUEST) + update_pc(&vcpu->arch); + + return ret; +} + /* * LoongArch KVM callback handling for unimplemented guest exiting */ @@ -716,6 +868,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = { [EXCCODE_LSXDIS] = kvm_handle_lsx_disabled, [EXCCODE_LASXDIS] = kvm_handle_lasx_disabled, [EXCCODE_GSPR] = kvm_handle_gspr, + [EXCCODE_HVC] = kvm_handle_hypercall, }; int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault) diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 111328f6087285a01ccf4077672cf4cc85266866..bcc6b6d063d914dbf820b43f2c1308803646b395 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -23,24 +23,6 @@ static inline u64 tick_to_ns(struct kvm_vcpu *vcpu, u64 tick) return div_u64(tick * MNSEC_PER_SEC, vcpu->arch.timer_mhz); } -/* - * Push timer forward on timeout. - * Handle an hrtimer event by push the hrtimer forward a period. - */ -static enum hrtimer_restart kvm_count_timeout(struct kvm_vcpu *vcpu) -{ - unsigned long cfg, period; - - /* Add periodic tick to current expire time */ - cfg = kvm_read_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TCFG); - if (cfg & CSR_TCFG_PERIOD) { - period = tick_to_ns(vcpu, cfg & CSR_TCFG_VAL); - hrtimer_add_expires_ns(&vcpu->arch.swtimer, period); - return HRTIMER_RESTART; - } else - return HRTIMER_NORESTART; -} - /* Low level hrtimer wake routine */ enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer) { @@ -50,7 +32,7 @@ enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer) kvm_queue_irq(vcpu, INT_TI); rcuwait_wake_up(&vcpu->wait); - return kvm_count_timeout(vcpu); + return HRTIMER_NORESTART; } /* @@ -93,7 +75,8 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) /* * Freeze the soft-timer and sync the guest stable timer with it. */ - hrtimer_cancel(&vcpu->arch.swtimer); + if (kvm_vcpu_is_blocking(vcpu)) + hrtimer_cancel(&vcpu->arch.swtimer); /* * From LoongArch Reference Manual Volume 1 Chapter 7.6.2 @@ -168,26 +151,20 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu) * Here judge one-shot timer fired by checking whether TVAL is larger * than TCFG */ - if (ticks < cfg) { + if (ticks < cfg) delta = tick_to_ns(vcpu, ticks); - expire = ktime_add_ns(ktime_get(), delta); - vcpu->arch.expire = expire; + else + delta = 0; + + expire = ktime_add_ns(ktime_get(), delta); + vcpu->arch.expire = expire; + if (kvm_vcpu_is_blocking(vcpu)) { /* * HRTIMER_MODE_PINNED is suggested since vcpu may run in * the same physical cpu in next time */ hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); - } else if (vcpu->stat.generic.blocking) { - /* - * Inject timer interrupt so that halt polling can dectect and exit. - * VCPU is scheduled out already and sleeps in rcuwait queue and - * will not poll pending events again. kvm_queue_irq() is not enough, - * hrtimer swtimer should be used here. - */ - expire = ktime_add_ns(ktime_get(), 10); - vcpu->arch.expire = expire; - hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); } } diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 36106922b5d75b7f7de70df5df0d72a697440f0f..685f2826d022f1d1e3148e530219db6e4a240f3b 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, idle_exits), STATS_DESC_COUNTER(VCPU, cpucfg_exits), STATS_DESC_COUNTER(VCPU, signal_exits), + STATS_DESC_COUNTER(VCPU, hypercall_exits) }; const struct kvm_stats_header kvm_vcpu_stats_header = { @@ -30,6 +31,115 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { sizeof(kvm_vcpu_stats_desc), }; +static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) +{ + struct kvm_steal_time __user *st; + struct gfn_to_hva_cache *ghc; + struct kvm_memslots *slots; + gpa_t gpa; + u64 steal; + u32 version; + + ghc = &vcpu->arch.st.cache; + gpa = vcpu->arch.st.guest_addr; + if (!(gpa & KVM_STEAL_PHYS_VALID)) + return; + + gpa &= KVM_STEAL_PHYS_MASK; + slots = kvm_memslots(vcpu->kvm); + if (slots->generation != ghc->generation || gpa != ghc->gpa) { + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, + sizeof(*st))) { + ghc->gpa = INVALID_GPA; + return; + } + } + + st = (struct kvm_steal_time __user *)ghc->hva; + unsafe_get_user(version, &st->version, out); + if (version & 1) + version += 1; + version += 1; + unsafe_put_user(version, &st->version, out); + /* Make sure st->version is written first */ + smp_wmb(); + + unsafe_get_user(steal, &st->steal, out); + steal += current->sched_info.run_delay - + vcpu->arch.st.last_steal; + vcpu->arch.st.last_steal = current->sched_info.run_delay; + unsafe_put_user(steal, &st->steal, out); + + /* Make sure st->steal is written first */ + smp_wmb(); + version += 1; + unsafe_put_user(version, &st->version, out); +out: + mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); +} + +static bool kvm_pvtime_supported(void) +{ + return !!sched_info_on(); +} + +static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 __user *user = (u64 __user *)attr->addr; + struct kvm *kvm = vcpu->kvm; + u64 gpa; + int ret = 0; + int idx; + + if (!kvm_pvtime_supported() || + attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + if (get_user(gpa, user)) + return -EFAULT; + + /* Check the address is in a valid memslot */ + idx = srcu_read_lock(&kvm->srcu); + if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT))) + ret = -EINVAL; + srcu_read_unlock(&kvm->srcu, idx); + + if (!ret) + vcpu->arch.st.guest_addr = gpa; + + return ret; +} + +static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 __user *user = (u64 __user *)attr->addr; + u64 gpa; + + if (!kvm_pvtime_supported() || + attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + gpa = vcpu->arch.st.guest_addr; + if (put_user(gpa, user)) + return -EFAULT; + + return 0; +} + +static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_LOONGARCH_VCPU_PVTIME_GPA: + if (kvm_pvtime_supported()) + return 0; + } + + return -ENXIO; +} + /* * kvm_check_requests - check and handle pending vCPU requests * @@ -47,6 +157,9 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu) if (kvm_dirty_ring_check_request(vcpu)) return RESUME_HOST; + if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) + kvm_update_stolen_time(vcpu); + return RESUME_GUEST; } @@ -247,7 +360,15 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - return -EINVAL; + if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) + return -EINVAL; + + if (dbg->control & KVM_GUESTDBG_ENABLE) + vcpu->guest_debug = dbg->control; + else + vcpu->guest_debug = 0; + + return 0; } static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) @@ -274,6 +395,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) return 0; } +static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val) +{ + int cpuid; + struct loongarch_csrs *csr = vcpu->arch.csr; + struct kvm_phyid_map *map; + + if (val >= KVM_MAX_PHYID) + return -EINVAL; + + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT); + map = vcpu->kvm->arch.phyid_map; + spin_lock(&vcpu->kvm->arch.phyid_map_lock); + if (map->phys_map[cpuid].enabled) { + /* + * Cpuid is already set before + * Forbid changing different cpuid at runtime + */ + if (cpuid != val) { + /* + * Cpuid 0 is initial value for vcpu, maybe invalid + * unset value for vcpu + */ + if (cpuid) { + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return -EINVAL; + } + } else { + /* Discard duplicated cpuid set */ + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return 0; + } + } + + if (map->phys_map[val].enabled) { + /* + * New cpuid is already set with other vcpu + * Forbid sharing the same cpuid between different vcpus + */ + if (map->phys_map[val].vcpu != vcpu) { + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return -EINVAL; + } + + /* Discard duplicated cpuid set operation*/ + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return 0; + } + + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val); + map->phys_map[val].enabled = true; + map->phys_map[val].vcpu = vcpu; + if (map->max_phyid < val) + map->max_phyid = val; + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return 0; +} + +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid) +{ + struct kvm_phyid_map *map; + + if (cpuid >= KVM_MAX_PHYID) + return NULL; + + map = kvm->arch.phyid_map; + if (map->phys_map[cpuid].enabled) + return map->phys_map[cpuid].vcpu; + + return NULL; +} + +static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu) +{ + int cpuid; + struct loongarch_csrs *csr = vcpu->arch.csr; + struct kvm_phyid_map *map; + + map = vcpu->kvm->arch.phyid_map; + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT); + if (cpuid >= KVM_MAX_PHYID) + return; + + if (map->phys_map[cpuid].enabled) { + map->phys_map[cpuid].vcpu = NULL; + map->phys_map[cpuid].enabled = false; + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, 0); + } +} + static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) { int ret = 0, gintc; @@ -291,7 +501,8 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc); return ret; - } + } else if (id == LOONGARCH_CSR_CPUID) + return kvm_set_cpuid(vcpu, val); kvm_write_sw_gcsr(csr, id, val); @@ -304,11 +515,18 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) return -EINVAL; switch (id) { - case 2: + case LOONGARCH_CPUCFG0: + *v = GENMASK(31, 0); + return 0; + case LOONGARCH_CPUCFG1: + /* CPUCFG1_MSGINT is not supported by KVM */ + *v = GENMASK(25, 0); + return 0; + case LOONGARCH_CPUCFG2: /* CPUCFG2 features unconditionally supported by KVM */ *v = CPUCFG2_FP | CPUCFG2_FPSP | CPUCFG2_FPDP | CPUCFG2_FPVERS | CPUCFG2_LLFTP | CPUCFG2_LLFTPREV | - CPUCFG2_LAM; + CPUCFG2_LSPW | CPUCFG2_LAM; /* * For the ISA extensions listed below, if one is supported * by the host, then it is also supported by KVM. @@ -318,21 +536,39 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) if (cpu_has_lasx) *v |= CPUCFG2_LASX; + return 0; + case LOONGARCH_CPUCFG3: + *v = GENMASK(16, 0); + return 0; + case LOONGARCH_CPUCFG4: + case LOONGARCH_CPUCFG5: + *v = GENMASK(31, 0); + return 0; + case LOONGARCH_CPUCFG6: + if (cpu_has_pmp) + *v = GENMASK(14, 0); + else + *v = 0; + return 0; + case LOONGARCH_CPUCFG16: + *v = GENMASK(16, 0); + return 0; + case LOONGARCH_CPUCFG17 ... LOONGARCH_CPUCFG20: + *v = GENMASK(30, 0); return 0; default: /* - * No restrictions on other valid CPUCFG IDs' values, but - * CPUCFG data is limited to 32 bits as the LoongArch ISA - * manual says (Volume 1, Section 2.2.10.5 "CPUCFG"). + * CPUCFG bits should be zero if reserved by HW or not + * supported by KVM. */ - *v = U32_MAX; + *v = 0; return 0; } } static int kvm_check_cpucfg(int id, u64 val) { - int ret; + int ret, host; u64 mask = 0; ret = _kvm_get_cpucfg_mask(id, &mask); @@ -344,7 +580,7 @@ static int kvm_check_cpucfg(int id, u64 val) return -EINVAL; switch (id) { - case 2: + case LOONGARCH_CPUCFG2: if (!(val & CPUCFG2_LLFTP)) /* Guests must have a constant timer */ return -EINVAL; @@ -358,6 +594,18 @@ static int kvm_check_cpucfg(int id, u64 val) /* LASX architecturally implies LSX and FP but val does not satisfy that */ return -EINVAL; return 0; + case LOONGARCH_CPUCFG6: + if (val & CPUCFG6_PMP) { + host = read_cpucfg(6); + if ((val & CPUCFG6_PMBITS) != (host & CPUCFG6_PMBITS)) + /* Guest pmbits must be the same with host */ + return -EINVAL; + if ((val & CPUCFG6_PMNUM) > (host & CPUCFG6_PMNUM)) + return -EINVAL; + if ((val & CPUCFG6_UPM) && !(host & CPUCFG6_UPM)) + return -EINVAL; + } + return 0; default: /* * Values for the other CPUCFG IDs are not being further validated @@ -390,6 +638,9 @@ static int kvm_get_one_reg(struct kvm_vcpu *vcpu, case KVM_REG_LOONGARCH_COUNTER: *v = drdtime() + vcpu->kvm->arch.time_offset; break; + case KVM_REG_LOONGARCH_DEBUG_INST: + *v = INSN_HVCL + KVM_HCALL_SWDBG; + break; default: ret = -EINVAL; break; @@ -534,6 +785,7 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu, { switch (attr->attr) { case 2: + case 6: return 0; default: return -ENXIO; @@ -551,6 +803,9 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, case KVM_LOONGARCH_VCPU_CPUCFG: ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr); break; + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: + ret = kvm_loongarch_pvtime_has_attr(vcpu, attr); + break; default: break; } @@ -583,6 +838,9 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, case KVM_LOONGARCH_VCPU_CPUCFG: ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr); break; + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: + ret = kvm_loongarch_pvtime_get_attr(vcpu, attr); + break; default: break; } @@ -605,6 +863,9 @@ static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu, case KVM_LOONGARCH_VCPU_CPUCFG: ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr); break; + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: + ret = kvm_loongarch_pvtime_set_attr(vcpu, attr); + break; default: break; } @@ -825,6 +1086,77 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) preempt_enable(); } +int kvm_own_pmu(struct kvm_vcpu *vcpu) +{ + unsigned long val; + + if (!kvm_guest_has_pmu(&vcpu->arch)) + return -EINVAL; + + preempt_disable(); + val = read_csr_gcfg() & ~CSR_GCFG_GPERF; + val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT; + write_csr_gcfg(val); + + vcpu->arch.aux_inuse |= KVM_LARCH_PERF; + preempt_enable(); + return 0; +} + +static void kvm_lose_pmu(struct kvm_vcpu *vcpu) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + + if (!(vcpu->arch.aux_inuse & KVM_LARCH_PERF)) + return; + + /* save guest pmu csr */ + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); + kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL0, 0); + kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL1, 0); + kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL2, 0); + kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL3, 0); + /* Disable pmu access from guest */ + write_csr_gcfg(read_csr_gcfg() & ~CSR_GCFG_GPERF); + + if (((kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3)) + & KVM_PMU_PLV_ENABLE) == 0) + vcpu->arch.aux_inuse &= ~KVM_LARCH_PERF; +} + +static void kvm_restore_pmu(struct kvm_vcpu *vcpu) +{ + unsigned long val; + struct loongarch_csrs *csr = vcpu->arch.csr; + + if (!(vcpu->arch.aux_inuse & KVM_LARCH_PERF)) + return; + + /* Set PM0-PM(num) to Guest */ + val = read_csr_gcfg() & ~CSR_GCFG_GPERF; + val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT; + write_csr_gcfg(val); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); +} + + int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { int intr = (int)irq->irq; @@ -924,6 +1256,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) hrtimer_cancel(&vcpu->arch.swtimer); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); kfree(vcpu->arch.csr); + kvm_drop_cpuid(vcpu); /* * If the vCPU is freed and reused as another vCPU, we don't want the @@ -963,6 +1296,11 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Control guest page CCA attribute */ change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT); + /* Restore hardware perf csr */ + kvm_restore_pmu(vcpu); + + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); + /* Don't bother restoring registers multiple times unless necessary */ if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE) return 0; @@ -1046,6 +1384,7 @@ static int _kvm_vcpu_put(struct kvm_vcpu *vcpu, int cpu) struct loongarch_csrs *csr = vcpu->arch.csr; kvm_lose_fpu(vcpu); + kvm_lose_pmu(vcpu); /* * Update CSR state from hardware if software CSR state is stale, diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 0a37f6fa8f2df03f444dd089ef44fa6efc512179..06fd746b03b6193b9560a0fce85ef9db5fbcb8a1 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -30,6 +30,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.pgd) return -ENOMEM; + kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map), + GFP_KERNEL_ACCOUNT); + if (!kvm->arch.phyid_map) { + free_page((unsigned long)kvm->arch.pgd); + kvm->arch.pgd = NULL; + return -ENOMEM; + } + kvm_init_vmcs(kvm); kvm->arch.gpa_size = BIT(cpu_vabits - 1); kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1; @@ -44,6 +52,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) for (i = 0; i <= kvm->arch.root_level; i++) kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3); + spin_lock_init(&kvm->arch.phyid_map_lock); return 0; } @@ -51,7 +60,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_destroy_vcpus(kvm); free_page((unsigned long)kvm->arch.pgd); + kvfree(kvm->arch.phyid_map); kvm->arch.pgd = NULL; + kvm->arch.phyid_map = NULL; } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -66,6 +77,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_IOEVENTFD: case KVM_CAP_MP_STATE: + case KVM_CAP_SET_GUEST_DEBUG: r = 1; break; case KVM_CAP_NR_VCPUS: diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index b66f1ce9ce0a3fcb6f8c2d67ce35970c4cb08eff..c5c26b8e8d0c5cd6ca29d3ad9928f94d33ab3b48 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -23,6 +23,16 @@ #define EIOINTC_REG_ISR 0x1800 #define EIOINTC_REG_ROUTE 0x1c00 +#define EXTIOI_VIRT_FEATURES 0x40000000 +#define EXTIOI_HAS_VIRT_EXTENSION 0 +#define EXTIOI_HAS_ENABLE_OPTION 1 +#define EXTIOI_HAS_INT_ENCODE 2 +#define EXTIOI_HAS_CPU_ENCODE 3 +#define EXTIOI_VIRT_CONFIG 0x40000004 +#define EXTIOI_ENABLE 1 +#define EXTIOI_ENABLE_INT_ENCODE 2 +#define EXTIOI_ENABLE_CPU_ENCODE 3 + #define VEC_REG_COUNT 4 #define VEC_COUNT_PER_REG 64 #define VEC_COUNT (VEC_REG_COUNT * VEC_COUNT_PER_REG) @@ -41,6 +51,7 @@ struct eiointc_priv { cpumask_t cpuspan_map; struct fwnode_handle *domain_handle; struct irq_domain *eiointc_domain; + bool cpu_encoded; }; static struct eiointc_priv *eiointc_priv[MAX_IO_PICS]; @@ -91,7 +102,16 @@ static DEFINE_RAW_SPINLOCK(affinity_lock); static void virt_extioi_set_irq_route(int irq, unsigned int cpu) { - iocsr_write8(cpu_logical_map(cpu), EIOINTC_REG_ROUTE + irq); + int data; + + /* + * get irq route info for continuous 4 vectors + * and set affinity for specified vector + */ + data = iocsr_read32(EIOINTC_REG_ROUTE + (irq & ~3)); + data &= ~(0xff << ((irq & 3) * 8)); + data |= cpu_logical_map(cpu) << ((irq & 3) * 8); + iocsr_write32(data, EIOINTC_REG_ROUTE + (irq & ~3)); } static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity, bool force) @@ -116,7 +136,7 @@ static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *af vector = d->hwirq; regaddr = EIOINTC_REG_ENABLE + ((vector >> 5) << 2); - if (cpu_has_hypervisor) { + if (priv->cpu_encoded) { iocsr_write32(EIOINTC_ALL_ENABLE & ~BIT(vector & 0x1F), regaddr); virt_extioi_set_irq_route(vector, cpu); iocsr_write32(EIOINTC_ALL_ENABLE, regaddr); @@ -181,9 +201,10 @@ static int eiointc_router_init(unsigned int cpu) for (i = 0; i < eiointc_priv[0]->vec_count / 4; i++) { /* Route to Node-0 Core-0 */ - if (index == 0) - bit = (cpu_has_hypervisor ? cpu_logical_map(0) - : BIT(cpu_logical_map(0))); + if (eiointc_priv[index]->cpu_encoded) + bit = cpu_logical_map(0); + else if (index == 0) + bit = BIT(cpu_logical_map(0)); else bit = (eiointc_priv[index]->node << 4) | 1; @@ -213,6 +234,12 @@ static void eiointc_irq_dispatch(struct irq_desc *desc) for (i = 0; i < eiointc_priv[0]->vec_count / VEC_COUNT_PER_REG; i++) { pending = iocsr_read64(EIOINTC_REG_ISR + (i << 3)); + + /* Skip handling if pending bitmap is zero */ + if (!pending) + continue; + + /* Clear the IRQs */ iocsr_write64(pending, EIOINTC_REG_ISR + (i << 3)); while (pending) { int bit = __ffs(pending); @@ -319,23 +346,7 @@ static int eiointc_suspend(void) static void eiointc_resume(void) { - int i, j; - struct irq_desc *desc; - struct irq_data *irq_data; - eiointc_router_init(0); - - for (i = 0; i < nr_pics; i++) { - for (j = 0; j < eiointc_priv[0]->vec_count; j++) { - desc = irq_resolve_mapping(eiointc_priv[i]->eiointc_domain, j); - if (desc && desc->handle_irq && desc->handle_irq != handle_bad_irq) { - raw_spin_lock(&desc->lock); - irq_data = irq_domain_get_irq_data(eiointc_priv[i]->eiointc_domain, irq_desc_get_irq(desc)); - eiointc_set_irq_affinity(irq_data, irq_data->common->affinity, 0); - raw_spin_unlock(&desc->lock); - } - } - } } static struct syscore_ops eiointc_syscore_ops = { @@ -394,7 +405,7 @@ static int __init acpi_cascade_irqdomain_init(void) static int __init eiointc_init(struct eiointc_priv *priv, int parent_irq, u64 node_map) { - int i; + int i, val; node_map = node_map ? node_map : -1ULL; for_each_possible_cpu(i) { @@ -414,6 +425,17 @@ static int __init eiointc_init(struct eiointc_priv *priv, int parent_irq, return -ENOMEM; } + if (cpu_has_hypervisor) { + val = iocsr_read32(EXTIOI_VIRT_FEATURES); + if (val & BIT(EXTIOI_HAS_CPU_ENCODE)) { + val = iocsr_read32(EXTIOI_VIRT_CONFIG); + val |= BIT(EXTIOI_ENABLE_CPU_ENCODE); + iocsr_write32(val, EXTIOI_VIRT_CONFIG); + priv->cpu_encoded = true; + pr_info("loongson-extioi: enable cpu encodig\n"); + } + } + eiointc_priv[nr_pics++] = priv; eiointc_router_init(0); irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch, priv);