From d5bd597b9f0cfed7942d47cfb8796ff0204d2646 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 18 Jan 2024 19:46:37 +0800 Subject: [PATCH 01/17] irqchip/loongson-eiointc: Skip handling if there is no pending interrupt commit 3eece72ded7f ("irqchip/loongson-eiointc: Skip handling if there is no pending interrupt") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. It is one simple optimization in the interrupt dispatch function eiointc_irq_dispatch(). There are 256 IRQs supported for eiointc on Loongson-3A5000 and Loongson-2K2000 platform, 128 IRQs on Loongson-2K0500 platform, eiointc irq handler reads the bitmap and find pending irqs when irq happens. So there are several consecutive iocsr_read64 operations for the all bits to find all pending irqs. If the pending bitmap is zero, it means that there is no pending irq for the this irq bitmap range, we can skip handling to avoid some useless operations such as clearing hw ISR. Signed-off-by: Bibo Mao Acked-by: Huacai Chen Signed-off-by: Xianglai Li --- drivers/irqchip/irq-loongson-eiointc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index b66f1ce9ce0a..33faa1c35511 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -213,6 +213,12 @@ static void eiointc_irq_dispatch(struct irq_desc *desc) for (i = 0; i < eiointc_priv[0]->vec_count / VEC_COUNT_PER_REG; i++) { pending = iocsr_read64(EIOINTC_REG_ISR + (i << 3)); + + /* Skip handling if pending bitmap is zero */ + if (!pending) + continue; + + /* Clear the IRQs */ iocsr_write64(pending, EIOINTC_REG_ISR + (i << 3)); while (pending) { int bit = __ffs(pending); -- Gitee From 648019d1caf01ceda9253bab806bef9618976367 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 18 Jan 2024 19:52:46 +0800 Subject: [PATCH 02/17] irqchip/loongson-eiointc: Remove explicit interrupt affinity restore on resume commit 83c0708719f7 ("irqchip/loongson-eiointc: Remove explicit interrupt affinity restore on resume") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. During suspend all CPUs except CPU0 are hot-unpluged and all active interrupts are migrated to CPU0. On resume eiointc_router_init() affines all interrupts to CPU0, so the subsequent explicit interrupt affinity restore is redundant. Remove it. [ tglx: Rewrote changelog ] Signed-off-by: Bibo Mao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240130082722.2912576-4-maobibo@loongson.cn -------------------------------- During suspend and resume, CPUs except CPU0 can be hot-unpluged and IRQs will be migrated to CPU0. So it is not necessary to restore irq affinity for eiointc irq controller when system resumes. This patch removes this piece of code about irq affinity restoring in function eiointc_resume(). Signed-off-by: Xianglai Li --- drivers/irqchip/irq-loongson-eiointc.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 33faa1c35511..6d7fc8a4b60e 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -325,23 +325,7 @@ static int eiointc_suspend(void) static void eiointc_resume(void) { - int i, j; - struct irq_desc *desc; - struct irq_data *irq_data; - eiointc_router_init(0); - - for (i = 0; i < nr_pics; i++) { - for (j = 0; j < eiointc_priv[0]->vec_count; j++) { - desc = irq_resolve_mapping(eiointc_priv[i]->eiointc_domain, j); - if (desc && desc->handle_irq && desc->handle_irq != handle_bad_irq) { - raw_spin_lock(&desc->lock); - irq_data = irq_domain_get_irq_data(eiointc_priv[i]->eiointc_domain, irq_desc_get_irq(desc)); - eiointc_set_irq_affinity(irq_data, irq_data->common->affinity, 0); - raw_spin_unlock(&desc->lock); - } - } - } } static struct syscore_ops eiointc_syscore_ops = { -- Gitee From 43a6c9bf2740f491cbafab89e503a10c4cd40a27 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 6 Mar 2024 09:12:13 +0800 Subject: [PATCH 03/17] LoongArch: KVM: Start SW timer only when vcpu is blocking commit 8bc15d02d5fd ("LoongArch: KVM: Start SW timer only when vcpu is blocking") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. SW timer is enabled when vcpu thread is scheduled out, and it is to wake up vcpu from blocked queue. If vcpu thread is scheduled out but is not blocked, such as it is preempted by other threads, it is not necessary to enable SW timer. Since vcpu thread is still on running queue if it is preempted and SW timer is only to wake up vcpu on blocking queue, so SW timer is not useful in this situation. This patch enables SW timer only when vcpu is scheduled out and is blocking. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/kvm/timer.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 111328f60872..b0dafe0611ab 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -93,7 +93,8 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) /* * Freeze the soft-timer and sync the guest stable timer with it. */ - hrtimer_cancel(&vcpu->arch.swtimer); + if (kvm_vcpu_is_blocking(vcpu)) + hrtimer_cancel(&vcpu->arch.swtimer); /* * From LoongArch Reference Manual Volume 1 Chapter 7.6.2 @@ -168,26 +169,20 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu) * Here judge one-shot timer fired by checking whether TVAL is larger * than TCFG */ - if (ticks < cfg) { + if (ticks < cfg) delta = tick_to_ns(vcpu, ticks); - expire = ktime_add_ns(ktime_get(), delta); - vcpu->arch.expire = expire; + else + delta = 0; + + expire = ktime_add_ns(ktime_get(), delta); + vcpu->arch.expire = expire; + if (kvm_vcpu_is_blocking(vcpu)) { /* * HRTIMER_MODE_PINNED is suggested since vcpu may run in * the same physical cpu in next time */ hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); - } else if (vcpu->stat.generic.blocking) { - /* - * Inject timer interrupt so that halt polling can dectect and exit. - * VCPU is scheduled out already and sleeps in rcuwait queue and - * will not poll pending events again. kvm_queue_irq() is not enough, - * hrtimer swtimer should be used here. - */ - expire = ktime_add_ns(ktime_get(), 10); - vcpu->arch.expire = expire; - hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); } } -- Gitee From a0e24235a5ee784b6ddce7c943352f83a61b20ad Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 6 Mar 2024 09:12:13 +0800 Subject: [PATCH 04/17] LoongArch: KVM: Do not restart SW timer when it is expired commit f66228053e42 ("LoongArch: KVM: Do not restart SW timer when it is expired") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. LoongArch VCPUs have their own separate HW timers. SW timer is to wake up blocked vcpu thread, rather than HW timer emulation. When blocking vcpu scheduled out, SW timer is used to wakeup blocked vcpu thread and injects timer interrupt. It does not care about whether guest timer is in period mode or oneshot mode, and SW timer needs not to be restarted since vcpu has been woken. This patch does not restart SW timer when it is expired. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/kvm/timer.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index b0dafe0611ab..bcc6b6d063d9 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -23,24 +23,6 @@ static inline u64 tick_to_ns(struct kvm_vcpu *vcpu, u64 tick) return div_u64(tick * MNSEC_PER_SEC, vcpu->arch.timer_mhz); } -/* - * Push timer forward on timeout. - * Handle an hrtimer event by push the hrtimer forward a period. - */ -static enum hrtimer_restart kvm_count_timeout(struct kvm_vcpu *vcpu) -{ - unsigned long cfg, period; - - /* Add periodic tick to current expire time */ - cfg = kvm_read_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TCFG); - if (cfg & CSR_TCFG_PERIOD) { - period = tick_to_ns(vcpu, cfg & CSR_TCFG_VAL); - hrtimer_add_expires_ns(&vcpu->arch.swtimer, period); - return HRTIMER_RESTART; - } else - return HRTIMER_NORESTART; -} - /* Low level hrtimer wake routine */ enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer) { @@ -50,7 +32,7 @@ enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer) kvm_queue_irq(vcpu, INT_TI); rcuwait_wake_up(&vcpu->wait); - return kvm_count_timeout(vcpu); + return HRTIMER_NORESTART; } /* -- Gitee From 868f475b229209e584ab57ed0f44dc3d59ccde82 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 6 Mar 2024 09:12:13 +0800 Subject: [PATCH 05/17] LoongArch: KVM: Set reserved bits as zero in CPUCFG commit aebd3bd586c6 ("LoongArch: KVM: Set reserved bits as zero in CPUCFG") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Supported CPUCFG information comes from function _kvm_get_cpucfg_mask(). A bit should be zero if it is reserved by HW or if it is not supported by KVM. Also LoongArch software page table walk feature defined in CPUCFG2_LSPW is supported by KVM, it should be enabled by default. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/kvm/vcpu.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 36106922b5d7..3a8779065f73 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -304,11 +304,18 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) return -EINVAL; switch (id) { - case 2: + case LOONGARCH_CPUCFG0: + *v = GENMASK(31, 0); + return 0; + case LOONGARCH_CPUCFG1: + /* CPUCFG1_MSGINT is not supported by KVM */ + *v = GENMASK(25, 0); + return 0; + case LOONGARCH_CPUCFG2: /* CPUCFG2 features unconditionally supported by KVM */ *v = CPUCFG2_FP | CPUCFG2_FPSP | CPUCFG2_FPDP | CPUCFG2_FPVERS | CPUCFG2_LLFTP | CPUCFG2_LLFTPREV | - CPUCFG2_LAM; + CPUCFG2_LSPW | CPUCFG2_LAM; /* * For the ISA extensions listed below, if one is supported * by the host, then it is also supported by KVM. @@ -318,14 +325,26 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) if (cpu_has_lasx) *v |= CPUCFG2_LASX; + return 0; + case LOONGARCH_CPUCFG3: + *v = GENMASK(16, 0); + return 0; + case LOONGARCH_CPUCFG4: + case LOONGARCH_CPUCFG5: + *v = GENMASK(31, 0); + return 0; + case LOONGARCH_CPUCFG16: + *v = GENMASK(16, 0); + return 0; + case LOONGARCH_CPUCFG17 ... LOONGARCH_CPUCFG20: + *v = GENMASK(30, 0); return 0; default: /* - * No restrictions on other valid CPUCFG IDs' values, but - * CPUCFG data is limited to 32 bits as the LoongArch ISA - * manual says (Volume 1, Section 2.2.10.5 "CPUCFG"). + * CPUCFG bits should be zero if reserved by HW or not + * supported by KVM. */ - *v = U32_MAX; + *v = 0; return 0; } } @@ -344,7 +363,7 @@ static int kvm_check_cpucfg(int id, u64 val) return -EINVAL; switch (id) { - case 2: + case LOONGARCH_CPUCFG2: if (!(val & CPUCFG2_LLFTP)) /* Guests must have a constant timer */ return -EINVAL; -- Gitee From aedd1b3b4fcca57c406c9a50ed154c6492066d12 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 25 Mar 2024 14:37:44 +0800 Subject: [PATCH 06/17] LoongArch/smp: Refine some ipi functions on LoongArch platform Upstream: no It is code refine about ipi handling on LoongArch platform, there are three modifications. 1. Add generic function get_percpu_irq(), replacing some percpu irq functions such as get_ipi_irq()/get_pmc_irq()/get_timer_irq() with get_percpu_irq(). 2. Change definition about parameter action called by function loongson_send_ipi_single() and loongson_send_ipi_mask(), and it is defined as decimal encoding format at ipi sender side. Normal decimal encoding is used rather than binary bitmap encoding for ipi action, ipi hw sender uses decimal encoding code, and ipi receiver will get binary bitmap encoding, the ipi hw will convert it into bitmap in ipi message buffer. 3. Add structure smp_ops on LoongArch platform so that pv ipi can be used later. Signed-off-by: Bibo Mao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/hardirq.h | 4 ++ arch/loongarch/include/asm/irq.h | 11 +++++- arch/loongarch/include/asm/smp.h | 31 +++++++-------- arch/loongarch/kernel/irq.c | 22 +---------- arch/loongarch/kernel/perf_event.c | 14 +------ arch/loongarch/kernel/smp.c | 58 +++++++++++++++++++--------- arch/loongarch/kernel/time.c | 12 +----- 7 files changed, 72 insertions(+), 80 deletions(-) diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h index 0ef3b18f8980..9f0038e19c7f 100644 --- a/arch/loongarch/include/asm/hardirq.h +++ b/arch/loongarch/include/asm/hardirq.h @@ -12,6 +12,10 @@ extern void ack_bad_irq(unsigned int irq); #define ack_bad_irq ack_bad_irq +enum ipi_msg_type { + IPI_RESCHEDULE, + IPI_CALL_FUNCTION, +}; #define NR_IPI 2 typedef struct { diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index ed8e72db0dba..85a3315597b6 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -118,9 +118,18 @@ extern struct fwnode_handle *liointc_handle; extern struct fwnode_handle *pch_lpc_handle; extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS]; -extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev); extern void fixup_irqs(void); +static inline int get_percpu_irq(int vector) +{ + struct irq_domain *d; + + d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); + if (d) + return irq_create_mapping(d, vector); + + return -EINVAL; +} #include #endif /* _ASM_IRQ_H */ diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index f81e5f01d619..75d30529748c 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -12,6 +12,13 @@ #include #include +struct smp_ops { + void (*init_ipi)(void); + void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action); + void (*send_ipi_single)(int cpu, unsigned int action); +}; + +extern struct smp_ops smp_ops; extern int smp_num_siblings; extern int num_processors; extern int disabled_cpus; @@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus); void loongson_boot_secondary(int cpu, struct task_struct *idle); void loongson_init_secondary(void); void loongson_smp_finish(void); -void loongson_send_ipi_single(int cpu, unsigned int action); -void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action); #ifdef CONFIG_HOTPLUG_CPU int loongson_cpu_disable(void); void loongson_cpu_die(unsigned int cpu); @@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS]; #define cpu_physical_id(cpu) cpu_logical_map(cpu) -#define SMP_BOOT_CPU 0x1 -#define SMP_RESCHEDULE 0x2 -#define SMP_CALL_FUNCTION 0x4 +#define ACTION_BOOT_CPU 0 +#define ACTION_RESCHEDULE 1 +#define ACTION_CALL_FUNCTION 2 +#define SMP_BOOT_CPU BIT(ACTION_BOOT_CPU) +#define SMP_RESCHEDULE BIT(ACTION_RESCHEDULE) +#define SMP_CALL_FUNCTION BIT(ACTION_CALL_FUNCTION) struct secondary_data { unsigned long stack; @@ -71,7 +79,8 @@ extern struct secondary_data cpuboot_data; extern asmlinkage void smpboot_entry(void); extern asmlinkage void start_secondary(void); - +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); extern void calculate_cpu_foreign_map(void); /* @@ -79,16 +88,6 @@ extern void calculate_cpu_foreign_map(void); */ extern void show_ipi_list(struct seq_file *p, int prec); -static inline void arch_send_call_function_single_ipi(int cpu) -{ - loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION); -} - -static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) -{ - loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION); -} - #ifdef CONFIG_HOTPLUG_CPU static inline int __cpu_disable(void) { diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index a1c1d9576393..73cd3b7703e2 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -86,16 +86,6 @@ static void __init init_vec_parent_group(void) acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse); } -static int __init get_ipi_irq(void) -{ - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); - - if (d) - return irq_create_mapping(d, INT_IPI); - - return -EINVAL; -} - #ifdef CONFIG_HOTPLUG_CPU static void handle_irq_affinity(void) { @@ -135,10 +125,6 @@ void fixup_irqs(void) void __init init_IRQ(void) { int i, ret; -#ifdef CONFIG_SMP - int r, ipi_irq; - static int ipi_dummy_dev; -#endif unsigned int order = get_order(IRQ_STACK_SIZE); struct page *page; @@ -154,13 +140,7 @@ void __init init_IRQ(void) irqchip_init(); } #ifdef CONFIG_SMP - ipi_irq = get_ipi_irq(); - if (ipi_irq < 0) - panic("IPI IRQ mapping failed\n"); - irq_set_percpu_devid(ipi_irq); - r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &ipi_dummy_dev); - if (r < 0) - panic("IPI IRQ request failed\n"); + smp_ops.init_ipi(); #endif for (i = 0; i < NR_IRQS; i++) diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index 0491bf453cd4..3265c8f33223 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -456,16 +456,6 @@ static void loongarch_pmu_disable(struct pmu *pmu) static DEFINE_MUTEX(pmu_reserve_mutex); static atomic_t active_events = ATOMIC_INIT(0); -static int get_pmc_irq(void) -{ - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); - - if (d) - return irq_create_mapping(d, INT_PCOV); - - return -EINVAL; -} - static void reset_counters(void *arg); static int __hw_perf_event_init(struct perf_event *event); @@ -473,7 +463,7 @@ static void hw_perf_event_destroy(struct perf_event *event) { if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) { on_each_cpu(reset_counters, NULL, 1); - free_irq(get_pmc_irq(), &loongarch_pmu); + free_irq(get_percpu_irq(INT_PCOV), &loongarch_pmu); mutex_unlock(&pmu_reserve_mutex); } } @@ -562,7 +552,7 @@ static int loongarch_pmu_event_init(struct perf_event *event) if (event->cpu >= 0 && !cpu_online(event->cpu)) return -ENODEV; - irq = get_pmc_irq(); + irq = get_percpu_irq(INT_PCOV); flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED; if (!atomic_inc_not_zero(&active_events)) { mutex_lock(&pmu_reserve_mutex); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 5fbdd172a248..9c3557760b5b 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -67,11 +67,6 @@ static cpumask_t cpu_core_setup_map; struct secondary_data cpuboot_data; static DEFINE_PER_CPU(int, cpu_state); -enum ipi_msg_type { - IPI_RESCHEDULE, - IPI_CALL_FUNCTION, -}; - static const char *ipi_types[NR_IPI] __tracepoint_string = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNCTION] = "Function call interrupts", @@ -191,24 +186,19 @@ static u32 ipi_read_clear(int cpu) static void ipi_write_action(int cpu, u32 action) { - unsigned int irq = 0; - - while ((irq = ffs(action))) { - uint32_t val = IOCSR_IPI_SEND_BLOCKING; + uint32_t val; - val |= (irq - 1); - val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT); - iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND); - action &= ~BIT(irq - 1); - } + val = IOCSR_IPI_SEND_BLOCKING | action; + val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT); + iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND); } -void loongson_send_ipi_single(int cpu, unsigned int action) +static void loongson_send_ipi_single(int cpu, unsigned int action) { ipi_write_action(cpu_logical_map(cpu), (u32)action); } -void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) +static void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; @@ -216,6 +206,16 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) ipi_write_action(cpu_logical_map(i), (u32)action); } +void arch_send_call_function_single_ipi(int cpu) +{ + smp_ops.send_ipi_single(cpu, ACTION_CALL_FUNCTION); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + smp_ops.send_ipi_mask(mask, ACTION_CALL_FUNCTION); +} + /* * This function sends a 'reschedule' IPI to another CPU. * it goes straight through and wastes no time serializing @@ -223,11 +223,11 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) */ void arch_smp_send_reschedule(int cpu) { - loongson_send_ipi_single(cpu, SMP_RESCHEDULE); + smp_ops.send_ipi_single(cpu, ACTION_RESCHEDULE); } EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); -irqreturn_t loongson_ipi_interrupt(int irq, void *dev) +static irqreturn_t loongson_ipi_interrupt(int irq, void *dev) { unsigned int action; unsigned int cpu = smp_processor_id(); @@ -247,6 +247,26 @@ irqreturn_t loongson_ipi_interrupt(int irq, void *dev) return IRQ_HANDLED; } +static void loongson_init_ipi(void) +{ + int r, ipi_irq; + + ipi_irq = get_percpu_irq(INT_IPI); + if (ipi_irq < 0) + panic("IPI IRQ mapping failed\n"); + + irq_set_percpu_devid(ipi_irq); + r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &irq_stat); + if (r < 0) + panic("IPI IRQ request failed\n"); +} + +struct smp_ops smp_ops = { + .init_ipi = loongson_init_ipi, + .send_ipi_single = loongson_send_ipi_single, + .send_ipi_mask = loongson_send_ipi_mask, +}; + static void __init fdt_smp_setup(void) { #ifdef CONFIG_OF @@ -322,7 +342,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle) csr_mail_send(entry, cpu_logical_map(cpu), 0); - loongson_send_ipi_single(cpu, SMP_BOOT_CPU); + loongson_send_ipi_single(cpu, ACTION_BOOT_CPU); } /* diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index e7015f7b70e3..fd5354f9be7c 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -123,16 +123,6 @@ void sync_counter(void) csr_write64(init_offset, LOONGARCH_CSR_CNTC); } -static int get_timer_irq(void) -{ - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); - - if (d) - return irq_create_mapping(d, INT_TI); - - return -EINVAL; -} - int constant_clockevent_init(void) { unsigned int cpu = smp_processor_id(); @@ -142,7 +132,7 @@ int constant_clockevent_init(void) static int irq = 0, timer_irq_installed = 0; if (!timer_irq_installed) { - irq = get_timer_irq(); + irq = get_percpu_irq(INT_TI); if (irq < 0) pr_err("Failed to map irq %d (timer)\n", irq); } -- Gitee From 9c6b595420de9c3d6a3691b6c523578a39ecc8ab Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Fri, 19 Jan 2024 09:37:28 +0800 Subject: [PATCH 07/17] LoongArch: KVM: Add hypercall instruction emulation support Upstream: no On LoongArch system, there is hypercall instruction special for virtualization. When system executes this instruction on host side, there is illegal instruction exception reported, however it will trap into host when it is executed in VM mode. When hypercall is emulated, A0 register is set with value KVM_HCALL_INVALID_CODE, rather than inject EXCCODE_INE invalid instruction exception. So VM can continue to executing the next code. Signed-off-by: Bibo Mao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/Kbuild | 1 - arch/loongarch/include/asm/kvm_para.h | 26 ++++++++++++++++++++++++++ arch/loongarch/include/uapi/asm/Kbuild | 2 -- arch/loongarch/kvm/exit.c | 10 ++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 arch/loongarch/include/asm/kvm_para.h delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 93783fa24f6e..22991a6f0e2b 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -23,4 +23,3 @@ generic-y += poll.h generic-y += param.h generic-y += posix_types.h generic-y += resource.h -generic-y += kvm_para.h diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h new file mode 100644 index 000000000000..d48f993ae206 --- /dev/null +++ b/arch/loongarch/include/asm/kvm_para.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_KVM_PARA_H +#define _ASM_LOONGARCH_KVM_PARA_H + +/* + * LoongArch hypercall return code + */ +#define KVM_HCALL_STATUS_SUCCESS 0 +#define KVM_HCALL_INVALID_CODE -1UL +#define KVM_HCALL_INVALID_PARAMETER -2UL + +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + +static inline unsigned int kvm_arch_para_hints(void) +{ + return 0; +} + +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} +#endif /* _ASM_LOONGARCH_KVM_PARA_H */ diff --git a/arch/loongarch/include/uapi/asm/Kbuild b/arch/loongarch/include/uapi/asm/Kbuild deleted file mode 100644 index 4aa680ca2e5f..000000000000 --- a/arch/loongarch/include/uapi/asm/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -generic-y += kvm_para.h diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index ed1d89d53e2e..923bbca9bd22 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -685,6 +685,15 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) +{ + update_pc(&vcpu->arch); + + /* Treat it as noop intruction, only set return value */ + vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE; + return RESUME_GUEST; +} + /* * LoongArch KVM callback handling for unimplemented guest exiting */ @@ -716,6 +725,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = { [EXCCODE_LSXDIS] = kvm_handle_lsx_disabled, [EXCCODE_LASXDIS] = kvm_handle_lasx_disabled, [EXCCODE_GSPR] = kvm_handle_gspr, + [EXCCODE_HVC] = kvm_handle_hypercall, }; int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault) -- Gitee From 7ac635c5737e0a0b78024b2aca71b63cba0f051d Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 6 Mar 2024 10:23:16 +0800 Subject: [PATCH 08/17] LoongArch: KVM: Add cpucfg area for kvm hypervisor Upstream: no Instruction cpucfg can be used to get processor features. And there is trap exception when it is executed in VM mode, and also it is to provide cpu features to VM. On real hardware cpucfg area 0 - 20 is used. Here one specified area 0x40000000 -- 0x400000ff is used for KVM hypervisor to privide PV features, and the area can be extended for other hypervisors in future. This area will never be used for real HW, it is only used by software. Signed-off-by: Bibo Mao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/inst.h | 1 + arch/loongarch/include/asm/loongarch.h | 10 +++++ arch/loongarch/kvm/exit.c | 59 +++++++++++++++++++------- 3 files changed, 54 insertions(+), 16 deletions(-) diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 008a88ead60d..7a3829008c59 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -65,6 +65,7 @@ enum reg2_op { revbd_op = 0x0f, revh2w_op = 0x10, revhd_op = 0x11, + cpucfg_op = 0x1b, iocsrrdb_op = 0x19200, iocsrrdh_op = 0x19201, iocsrrdw_op = 0x19202, diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 67be98049038..2563d9292517 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -158,6 +158,16 @@ #define CPUCFG48_VFPU_CG BIT(2) #define CPUCFG48_RAM_CG BIT(3) +/* + * cpucfg index area: 0x40000000 -- 0x400000ff + * SW emulation for KVM hypervirsor + */ +#define CPUCFG_KVM_BASE 0x40000000UL +#define CPUCFG_KVM_SIZE 0x100 +#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE +#define KVM_SIGNATURE "KVM\0" +#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4) + #ifndef __ASSEMBLY__ /* CSR */ diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 923bbca9bd22..a8d3b652d3ea 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -206,10 +206,50 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu) return EMULATE_DONE; } -static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) +static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) { int rd, rj; unsigned int index; + unsigned long plv; + + rd = inst.reg2_format.rd; + rj = inst.reg2_format.rj; + ++vcpu->stat.cpucfg_exits; + index = vcpu->arch.gprs[rj]; + + /* + * By LoongArch Reference Manual 2.2.10.5 + * Return value is 0 for undefined cpucfg index + * + * Disable preemption since hw gcsr is accessed + */ + preempt_disable(); + plv = kvm_read_hw_gcsr(LOONGARCH_CSR_CRMD) >> CSR_CRMD_PLV_SHIFT; + switch (index) { + case 0 ... (KVM_MAX_CPUCFG_REGS - 1): + vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index]; + break; + case CPUCFG_KVM_SIG: + /* + * Cpucfg emulation between 0x40000000 -- 0x400000ff + * Return value with 0 if executed in user mode + */ + if ((plv & CSR_CRMD_PLV) == PLV_KERN) + vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE; + else + vcpu->arch.gprs[rd] = 0; + break; + default: + vcpu->arch.gprs[rd] = 0; + break; + } + + preempt_enable(); + return EMULATE_DONE; +} + +static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) +{ unsigned long curr_pc; larch_inst inst; enum emulation_result er = EMULATE_DONE; @@ -224,21 +264,8 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) er = EMULATE_FAIL; switch (((inst.word >> 24) & 0xff)) { case 0x0: /* CPUCFG GSPR */ - if (inst.reg2_format.opcode == 0x1B) { - rd = inst.reg2_format.rd; - rj = inst.reg2_format.rj; - ++vcpu->stat.cpucfg_exits; - index = vcpu->arch.gprs[rj]; - er = EMULATE_DONE; - /* - * By LoongArch Reference Manual 2.2.10.5 - * return value is 0 for undefined cpucfg index - */ - if (index < KVM_MAX_CPUCFG_REGS) - vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index]; - else - vcpu->arch.gprs[rd] = 0; - } + if (inst.reg2_format.opcode == cpucfg_op) + er = kvm_emu_cpucfg(vcpu, inst); break; case 0x4: /* CSR{RD,WR,XCHG} GSPR */ er = kvm_handle_csr(vcpu, inst); -- Gitee From b67740ccc11f38ed0084f26cb1f2b02c6412de08 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Fri, 5 Jan 2024 16:20:34 +0800 Subject: [PATCH 09/17] LoongArch: KVM: Add vcpu search support from physical cpuid Upstream: no Physical cpuid is used for interrupt routing for irqchips such as ipi/msi/extioi interrupt controller. And physical cpuid is stored at CSR register LOONGARCH_CSR_CPUID, it can not be changed once vcpu is created and physical cpuid of two vcpus cannot be the same. Different irqchips have different size declaration about physical cpuid, max cpuid value for CSR LOONGARCH_CSR_CPUID on 3A5000 is 512, max cpuid supported by IPI hardware is 1024, 256 for extioi irqchip, and 65536 for MSI irqchip. The smallest value from all interrupt controllers is selected now, and the max cpuid size is defines as 256 by KVM which comes from extioi irqchip. Signed-off-by: Bibo Mao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 26 ++++++++ arch/loongarch/include/asm/kvm_vcpu.h | 1 + arch/loongarch/kvm/vcpu.c | 93 ++++++++++++++++++++++++++- arch/loongarch/kvm/vm.c | 11 ++++ 4 files changed, 130 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 5bdb34b2c5d6..e5ba021679f4 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -64,6 +64,30 @@ struct kvm_world_switch { #define MAX_PGTABLE_LEVELS 4 +/* + * Physical cpu id is used for interrupt routing, there are different + * definitions about physical cpuid on different hardwares. + * For LOONGARCH_CSR_CPUID register, max cpuid size if 512 + * For IPI HW, max dest CPUID size 1024 + * For extioi interrupt controller, max dest CPUID size is 256 + * For MSI interrupt controller, max supported CPUID size is 65536 + * + * Currently max CPUID is defined as 256 for KVM hypervisor, in future + * it will be expanded to 4096, including 16 packages at most. And every + * package supports at most 256 vcpus + */ +#define KVM_MAX_PHYID 256 + +struct kvm_phyid_info { + struct kvm_vcpu *vcpu; + bool enabled; +}; + +struct kvm_phyid_map { + int max_phyid; + struct kvm_phyid_info phys_map[KVM_MAX_PHYID]; +}; + struct kvm_arch { /* Guest physical mm */ kvm_pte_t *pgd; @@ -71,6 +95,8 @@ struct kvm_arch { unsigned long invalid_ptes[MAX_PGTABLE_LEVELS]; unsigned int pte_shifts[MAX_PGTABLE_LEVELS]; unsigned int root_level; + spinlock_t phyid_map_lock; + struct kvm_phyid_map *phyid_map; s64 time_offset; struct kvm_context __percpu *vmcs; diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index 0cb4fdb8a9b5..9f53950959da 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu); void kvm_restore_timer(struct kvm_vcpu *vcpu); int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid); /* * Loongarch KVM guest interrupt handling diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 3a8779065f73..b633fd28b8db 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) return 0; } +static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val) +{ + int cpuid; + struct loongarch_csrs *csr = vcpu->arch.csr; + struct kvm_phyid_map *map; + + if (val >= KVM_MAX_PHYID) + return -EINVAL; + + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT); + map = vcpu->kvm->arch.phyid_map; + spin_lock(&vcpu->kvm->arch.phyid_map_lock); + if (map->phys_map[cpuid].enabled) { + /* + * Cpuid is already set before + * Forbid changing different cpuid at runtime + */ + if (cpuid != val) { + /* + * Cpuid 0 is initial value for vcpu, maybe invalid + * unset value for vcpu + */ + if (cpuid) { + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return -EINVAL; + } + } else { + /* Discard duplicated cpuid set */ + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return 0; + } + } + + if (map->phys_map[val].enabled) { + /* + * New cpuid is already set with other vcpu + * Forbid sharing the same cpuid between different vcpus + */ + if (map->phys_map[val].vcpu != vcpu) { + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return -EINVAL; + } + + /* Discard duplicated cpuid set operation*/ + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return 0; + } + + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val); + map->phys_map[val].enabled = true; + map->phys_map[val].vcpu = vcpu; + if (map->max_phyid < val) + map->max_phyid = val; + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return 0; +} + +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid) +{ + struct kvm_phyid_map *map; + + if (cpuid >= KVM_MAX_PHYID) + return NULL; + + map = kvm->arch.phyid_map; + if (map->phys_map[cpuid].enabled) + return map->phys_map[cpuid].vcpu; + + return NULL; +} + +static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu) +{ + int cpuid; + struct loongarch_csrs *csr = vcpu->arch.csr; + struct kvm_phyid_map *map; + + map = vcpu->kvm->arch.phyid_map; + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT); + if (cpuid >= KVM_MAX_PHYID) + return; + + if (map->phys_map[cpuid].enabled) { + map->phys_map[cpuid].vcpu = NULL; + map->phys_map[cpuid].enabled = false; + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, 0); + } +} + static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) { int ret = 0, gintc; @@ -291,7 +380,8 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc); return ret; - } + } else if (id == LOONGARCH_CSR_CPUID) + return kvm_set_cpuid(vcpu, val); kvm_write_sw_gcsr(csr, id, val); @@ -943,6 +1033,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) hrtimer_cancel(&vcpu->arch.swtimer); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); kfree(vcpu->arch.csr); + kvm_drop_cpuid(vcpu); /* * If the vCPU is freed and reused as another vCPU, we don't want the diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 0a37f6fa8f2d..6006a28653ad 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -30,6 +30,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.pgd) return -ENOMEM; + kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map), + GFP_KERNEL_ACCOUNT); + if (!kvm->arch.phyid_map) { + free_page((unsigned long)kvm->arch.pgd); + kvm->arch.pgd = NULL; + return -ENOMEM; + } + kvm_init_vmcs(kvm); kvm->arch.gpa_size = BIT(cpu_vabits - 1); kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1; @@ -44,6 +52,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) for (i = 0; i <= kvm->arch.root_level; i++) kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3); + spin_lock_init(&kvm->arch.phyid_map_lock); return 0; } @@ -51,7 +60,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_destroy_vcpus(kvm); free_page((unsigned long)kvm->arch.pgd); + kvfree(kvm->arch.phyid_map); kvm->arch.pgd = NULL; + kvm->arch.phyid_map = NULL; } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) -- Gitee From 64e8d8138c35aca5eefa735b019d8ec55d44a408 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Fri, 1 Mar 2024 18:17:29 +0800 Subject: [PATCH 10/17] LoongArch: KVM: Add pv ipi support on kvm side Upstream: no On LoongArch system, ipi hw uses iocsr registers, there is one iocsr register access on ipi sending, and two iocsr access on ipi receiving which is ipi interrupt handler. On VM mode all iocsr accessing will cause VM to trap into hypervisor. So with one ipi hw notification there will be three times of trap. PV ipi is added for VM, hypercall instruction is used for ipi sender, and hypervisor will inject SWI to destination vcpu. During SWI interrupt handler, only estat CSR register is written to clear irq. Estat CSR register access will not trap into hypervisor. So with pv ipi supported, there is one trap with pv ipi sender, and no trap with ipi receiver, there is only one trap with ipi notification. Also this patch adds ipi multicast support, the method is similar with x86. With ipi multicast support, ipi notification can be sent to at most 128 vcpus at one time. It reduces trap times into hypervisor greatly. Signed-off-by: Bibo Mao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 1 + arch/loongarch/include/asm/kvm_para.h | 130 +++++++++++++++++++++++++ arch/loongarch/include/asm/loongarch.h | 1 + arch/loongarch/kvm/exit.c | 76 ++++++++++++++- arch/loongarch/kvm/vcpu.c | 1 + 5 files changed, 207 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index e5ba021679f4..c31619ae1fb4 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -43,6 +43,7 @@ struct kvm_vcpu_stat { u64 idle_exits; u64 cpucfg_exits; u64 signal_exits; + u64 hypercall_exits; }; #define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0) diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index d48f993ae206..a82bffbbf8a1 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -2,6 +2,16 @@ #ifndef _ASM_LOONGARCH_KVM_PARA_H #define _ASM_LOONGARCH_KVM_PARA_H +/* + * Hypercall code field + */ +#define HYPERVISOR_KVM 1 +#define HYPERVISOR_VENDOR_SHIFT 8 +#define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code) +#define KVM_HCALL_CODE_PV_SERVICE 0 +#define KVM_HCALL_PV_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_PV_SERVICE) +#define KVM_HCALL_FUNC_PV_IPI 1 + /* * LoongArch hypercall return code */ @@ -9,6 +19,126 @@ #define KVM_HCALL_INVALID_CODE -1UL #define KVM_HCALL_INVALID_PARAMETER -2UL +/* + * Hypercall interface for KVM hypervisor + * + * a0: function identifier + * a1-a6: args + * Return value will be placed in v0. + * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6. + */ +static __always_inline long kvm_hypercall(u64 fid) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + : "=r" (ret) + : "r" (fun) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + : "=r" (ret) + : "r" (fun), "r" (a1) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall2(u64 fid, + unsigned long arg0, unsigned long arg1) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + : "=r" (ret) + : "r" (fun), "r" (a1), "r" (a2) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall3(u64 fid, + unsigned long arg0, unsigned long arg1, unsigned long arg2) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + register unsigned long a3 asm("a3") = arg2; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + : "=r" (ret) + : "r" (fun), "r" (a1), "r" (a2), "r" (a3) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall4(u64 fid, + unsigned long arg0, unsigned long arg1, unsigned long arg2, + unsigned long arg3) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + register unsigned long a3 asm("a3") = arg2; + register unsigned long a4 asm("a4") = arg3; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + : "=r" (ret) + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall5(u64 fid, + unsigned long arg0, unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + register unsigned long a3 asm("a3") = arg2; + register unsigned long a4 asm("a4") = arg3; + register unsigned long a5 asm("a5") = arg4; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + : "=r" (ret) + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5) + : "memory" + ); + + return ret; +} + + static inline unsigned int kvm_arch_para_features(void) { return 0; diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 2563d9292517..cd58cb042b6a 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -167,6 +167,7 @@ #define CPUCFG_KVM_SIG CPUCFG_KVM_BASE #define KVM_SIGNATURE "KVM\0" #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4) +#define KVM_FEATURE_PV_IPI BIT(1) #ifndef __ASSEMBLY__ diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index a8d3b652d3ea..933879ad0ddc 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -239,6 +239,12 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) else vcpu->arch.gprs[rd] = 0; break; + case CPUCFG_KVM_FEATURE: + if ((plv & CSR_CRMD_PLV) == PLV_KERN) + vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI; + else + vcpu->arch.gprs[rd] = 0; + break; default: vcpu->arch.gprs[rd] = 0; break; @@ -712,12 +718,78 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu) +{ + unsigned long ipi_bitmap; + unsigned int min, cpu, i; + struct kvm_vcpu *dest; + + min = vcpu->arch.gprs[LOONGARCH_GPR_A3]; + for (i = 0; i < 2; i++, min += BITS_PER_LONG) { + ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1 + i]; + if (!ipi_bitmap) + continue; + + cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG); + while (cpu < BITS_PER_LONG) { + dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min); + cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG, + cpu + 1); + if (!dest) + continue; + + /* + * Send SWI0 to dest vcpu to emulate IPI interrupt + */ + kvm_queue_irq(dest, INT_SWI0); + kvm_vcpu_kick(dest); + } + } + + return 0; +} + +/* + * hypercall emulation always return to guest, Caller should check retval. + */ +static void kvm_handle_pv_service(struct kvm_vcpu *vcpu) +{ + unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0]; + long ret; + + switch (func) { + case KVM_HCALL_FUNC_PV_IPI: + kvm_pv_send_ipi(vcpu); + ret = KVM_HCALL_STATUS_SUCCESS; + break; + default: + ret = KVM_HCALL_INVALID_CODE; + break; + }; + + vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret; +} + static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) { + larch_inst inst; + unsigned int code; + + inst.word = vcpu->arch.badi; + code = inst.reg0i15_format.immediate; update_pc(&vcpu->arch); - /* Treat it as noop intruction, only set return value */ - vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE; + switch (code) { + case KVM_HCALL_PV_SERVICE: + vcpu->stat.hypercall_exits++; + kvm_handle_pv_service(vcpu); + break; + default: + /* Treat it as noop intruction, only set return value */ + vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE; + break; + } + return RESUME_GUEST; } diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index b633fd28b8db..76f2086ab68b 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, idle_exits), STATS_DESC_COUNTER(VCPU, cpucfg_exits), STATS_DESC_COUNTER(VCPU, signal_exits), + STATS_DESC_COUNTER(VCPU, hypercall_exits) }; const struct kvm_stats_header kvm_vcpu_stats_header = { -- Gitee From e1c81abe2a72da4aa394151dd0f97057d5d18a08 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Fri, 1 Mar 2024 11:42:21 +0800 Subject: [PATCH 11/17] LoongArch: Add pv ipi support on guest kernel side Upstream: no PARAVIRT option and pv ipi is added on guest kernel side, function pv_ipi_init() is to add ipi sending and ipi receiving hooks. This function firstly checks whether system runs on VM mode. If kernel runs on VM mode, it will call function kvm_para_available() to detect current hypervirsor type. Now only KVM type detection is supported, the paravirt function can work only if current hypervisor type is KVM, since there is only KVM supported on LoongArch now. PV IPI uses virtual IPI sender and virtual IPI receiver function. With virutal IPI sender, ipi message is stored in DDR memory rather than emulated HW. IPI multicast is supported, and 128 vcpus can received IPIs at the same time like X86 KVM method. Hypercall method is used for IPI sending. With virtual IPI receiver, HW SW0 is used rather than real IPI HW. Since VCPU has separate HW SW0 like HW timer, there is no trap in IPI interrupt acknowledge. And IPI message is stored in DDR, no trap in get IPI message. Signed-off-by: Bibo Mao Signed-off-by: Xianglai Li --- arch/loongarch/Kconfig | 9 ++ arch/loongarch/include/asm/hardirq.h | 1 + arch/loongarch/include/asm/paravirt.h | 27 ++++ .../include/asm/paravirt_api_clock.h | 10 ++ arch/loongarch/kernel/Makefile | 1 + arch/loongarch/kernel/irq.c | 2 +- arch/loongarch/kernel/paravirt.c | 151 ++++++++++++++++++ arch/loongarch/kernel/smp.c | 4 +- 8 files changed, 203 insertions(+), 2 deletions(-) create mode 100644 arch/loongarch/include/asm/paravirt.h create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h create mode 100644 arch/loongarch/kernel/paravirt.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index bad326ae58f2..7b82992af3c4 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -565,6 +565,15 @@ config CPU_HAS_PREFETCH bool default y +config PARAVIRT + bool "Enable paravirtualization code" + depends on AS_HAS_LVZ_EXTENSION + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. However, when run without a hypervisor + the kernel is theoretically slower and slightly larger. + config ARCH_SUPPORTS_KEXEC def_bool y diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h index 9f0038e19c7f..b26d596a73aa 100644 --- a/arch/loongarch/include/asm/hardirq.h +++ b/arch/loongarch/include/asm/hardirq.h @@ -21,6 +21,7 @@ enum ipi_msg_type { typedef struct { unsigned int ipi_irqs[NR_IPI]; unsigned int __softirq_pending; + atomic_t message ____cacheline_aligned_in_smp; } ____cacheline_aligned irq_cpustat_t; DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h new file mode 100644 index 000000000000..58f7b7b89f2c --- /dev/null +++ b/arch/loongarch/include/asm/paravirt.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_PARAVIRT_H +#define _ASM_LOONGARCH_PARAVIRT_H + +#ifdef CONFIG_PARAVIRT +#include +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +u64 dummy_steal_clock(int cpu); +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); + +static inline u64 paravirt_steal_clock(int cpu) +{ + return static_call(pv_steal_clock)(cpu); +} + +int pv_ipi_init(void); +#else +static inline int pv_ipi_init(void) +{ + return 0; +} + +#endif // CONFIG_PARAVIRT +#endif diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h b/arch/loongarch/include/asm/paravirt_api_clock.h new file mode 100644 index 000000000000..8a418f0b4fd5 --- /dev/null +++ b/arch/loongarch/include/asm/paravirt_api_clock.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ +#ifndef _ASM_API_CLOCK_H +#define _ASM_API_CLOCK_H + +#include + +#endif diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 10ee5fc7ac3e..6c148ccea674 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -49,6 +49,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index 73cd3b7703e2..8b21449a7092 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -154,5 +154,5 @@ void __init init_IRQ(void) per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE); } - set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC); + set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC); } diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c new file mode 100644 index 000000000000..9044ed62045c --- /dev/null +++ b/arch/loongarch/kernel/paravirt.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include + +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; + +static u64 native_steal_clock(int cpu) +{ + return 0; +} + +DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); + +#ifdef CONFIG_SMP +static void pv_send_ipi_single(int cpu, unsigned int action) +{ + unsigned int min, old; + irq_cpustat_t *info = &per_cpu(irq_stat, cpu); + + old = atomic_fetch_or(BIT(action), &info->message); + if (old) + return; + + min = cpu_logical_map(cpu); + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, 1, 0, min); +} + +#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) +static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action) +{ + unsigned int cpu, i, min = 0, max = 0, old; + __uint128_t bitmap = 0; + irq_cpustat_t *info; + + if (cpumask_empty(mask)) + return; + + action = BIT(action); + for_each_cpu(i, mask) { + info = &per_cpu(irq_stat, i); + old = atomic_fetch_or(action, &info->message); + if (old) + continue; + + cpu = cpu_logical_map(i); + if (!bitmap) { + min = max = cpu; + } else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) { + max = cpu > max ? cpu : max; + } else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) { + bitmap <<= min - cpu; + min = cpu; + } else { + /* + * Physical cpuid is sorted in ascending order ascend + * for the next mask calculation, send IPI here + * directly and skip the remainding cpus + */ + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, + (unsigned long)bitmap, + (unsigned long)(bitmap >> BITS_PER_LONG), min); + min = max = cpu; + bitmap = 0; + } + __set_bit(cpu - min, (unsigned long *)&bitmap); + } + + if (bitmap) + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, (unsigned long)bitmap, + (unsigned long)(bitmap >> BITS_PER_LONG), min); +} + +static irqreturn_t loongson_do_swi(int irq, void *dev) +{ + irq_cpustat_t *info; + long action; + + /* Clear swi interrupt */ + clear_csr_estat(1 << INT_SWI0); + info = this_cpu_ptr(&irq_stat); + action = atomic_xchg(&info->message, 0); + if (action & SMP_CALL_FUNCTION) { + generic_smp_call_function_interrupt(); + info->ipi_irqs[IPI_CALL_FUNCTION]++; + } + + if (action & SMP_RESCHEDULE) { + scheduler_ipi(); + info->ipi_irqs[IPI_RESCHEDULE]++; + } + + return IRQ_HANDLED; +} + +static void pv_init_ipi(void) +{ + int r, swi0; + + swi0 = get_percpu_irq(INT_SWI0); + if (swi0 < 0) + panic("SWI0 IRQ mapping failed\n"); + irq_set_percpu_devid(swi0); + r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat); + if (r < 0) + panic("SWI0 IRQ request failed\n"); +} +#endif + +static bool kvm_para_available(void) +{ + static int hypervisor_type; + int config; + + if (!hypervisor_type) { + config = read_cpucfg(CPUCFG_KVM_SIG); + if (!memcmp(&config, KVM_SIGNATURE, 4)) + hypervisor_type = HYPERVISOR_KVM; + } + + return hypervisor_type == HYPERVISOR_KVM; +} + +int __init pv_ipi_init(void) +{ + int feature; + + if (!cpu_has_hypervisor) + return 0; + if (!kvm_para_available()) + return 0; + + /* + * check whether KVM hypervisor supports pv_ipi or not + */ + feature = read_cpucfg(CPUCFG_KVM_FEATURE); +#ifdef CONFIG_SMP + if (feature & KVM_FEATURE_PV_IPI) { + smp_ops.init_ipi = pv_init_ipi; + smp_ops.send_ipi_single = pv_send_ipi_single; + smp_ops.send_ipi_mask = pv_send_ipi_mask; + } +#endif + + return 1; +} diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 9c3557760b5b..897127c26388 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -307,6 +308,7 @@ void __init loongson_smp_setup(void) cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package; cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; + pv_ipi_init(); iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN); pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus); } @@ -351,7 +353,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle) void loongson_init_secondary(void) { unsigned int cpu = smp_processor_id(); - unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | + unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER; change_csr_ecfg(ECFG0_IM, imask); -- Gitee From 109f2a535edc9de3fe870c14815a52918b3bee4d Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 2 Mar 2024 15:20:50 +0800 Subject: [PATCH 12/17] Documentation: KVM: Add hypercall for LoongArch Upstream: no Add documentation topic for using pv_virt when running as a guest on KVM hypervisor. Signed-off-by: Bibo Mao Signed-off-by: Xianglai Li --- Documentation/virt/kvm/index.rst | 1 + .../virt/kvm/loongarch/hypercalls.rst | 79 +++++++++++++++++++ Documentation/virt/kvm/loongarch/index.rst | 10 +++ 3 files changed, 90 insertions(+) create mode 100644 Documentation/virt/kvm/loongarch/hypercalls.rst create mode 100644 Documentation/virt/kvm/loongarch/index.rst diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst index ad13ec55ddfe..9ca5a45c2140 100644 --- a/Documentation/virt/kvm/index.rst +++ b/Documentation/virt/kvm/index.rst @@ -14,6 +14,7 @@ KVM s390/index ppc-pv x86/index + loongarch/index locking vcpu-requests diff --git a/Documentation/virt/kvm/loongarch/hypercalls.rst b/Documentation/virt/kvm/loongarch/hypercalls.rst new file mode 100644 index 000000000000..1679e48d67d2 --- /dev/null +++ b/Documentation/virt/kvm/loongarch/hypercalls.rst @@ -0,0 +1,79 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================================== +The LoongArch paravirtual interface +=================================== + +KVM hypercalls use the HVCL instruction with code 0x100, and the hypercall +number is put in a0 and up to five arguments may be placed in a1-a5, the +return value is placed in v0 (alias with a0). + +The code for that interface can be found in arch/loongarch/kvm/* + +Querying for existence +====================== + +To find out if we're running on KVM or not, cpucfg can be used with index +CPUCFG_KVM_BASE (0x40000000), cpucfg range between 0x40000000 - 0x400000FF +is marked as a specially reserved range. All existing and future processors +will not implement any features in this range. + +When Linux is running on KVM, cpucfg with index CPUCFG_KVM_BASE (0x40000000) +returns magic string "KVM\0" + +Once you determined you're running under a PV capable KVM, you can now use +hypercalls as described below. + +KVM hypercall ABI +================= + +Hypercall ABI on KVM is simple, only one scratch register a0 (v0) and at most +five generic registers used as input parameter. FP register and vector register +is not used for input register and should not be modified during hypercall. +Hypercall function can be inlined since there is only one scratch register. + +The parameters are as follows: + + ======== ================ ================ + Register IN OUT + ======== ================ ================ + a0 function number Return code + a1 1st parameter - + a2 2nd parameter - + a3 3rd parameter - + a4 4th parameter - + a5 5th parameter - + ======== ================ ================ + +Return codes can be as follows: + + ==== ========================= + Code Meaning + ==== ========================= + 0 Success + -1 Hypercall not implemented + -2 Hypercall parameter error + ==== ========================= + +KVM Hypercalls Documentation +============================ + +The template for each hypercall is: +1. Hypercall name +2. Purpose + +1. KVM_HCALL_FUNC_PV_IPI +------------------------ + +:Purpose: Send IPIs to multiple vCPUs. + +- a0: KVM_HCALL_FUNC_PV_IPI +- a1: lower part of the bitmap of destination physical CPUIDs +- a2: higher part of the bitmap of destination physical CPUIDs +- a3: the lowest physical CPUID in bitmap + +The hypercall lets a guest send multicast IPIs, with at most 128 +destinations per hypercall. The destinations are represented by a bitmap +contained in the first two arguments (a1 and a2). Bit 0 of a1 corresponds +to the physical CPUID in the third argument (a3), bit 1 corresponds to the +physical ID a3+1, and so on. diff --git a/Documentation/virt/kvm/loongarch/index.rst b/Documentation/virt/kvm/loongarch/index.rst new file mode 100644 index 000000000000..83387b4c5345 --- /dev/null +++ b/Documentation/virt/kvm/loongarch/index.rst @@ -0,0 +1,10 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================= +KVM for LoongArch systems +========================= + +.. toctree:: + :maxdepth: 2 + + hypercalls.rst -- Gitee From a383af58a2389831069948390a36df29a5f251ef Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 6 Mar 2024 11:03:13 +0800 Subject: [PATCH 13/17] LoongArch: KVM: Add software breakpoint support Upstream: no When VM runs in kvm mode, system will not exit to host mode if executing general software breakpoint instruction, one trap exception happens in guest mode rather than host mode. In order to debug guest kernel on host side, one mechanism should be used to let vm exit to host mode. Here one special hypercall code is used for software breakpoint usage, vm exists to host mode and kvm hypervisor identifies the special hypercall code and sets exit_reason with KVM_EXIT_DEBUG, and then let qemu handle it. Signed-off-by: Bibo Mao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/inst.h | 1 + arch/loongarch/include/asm/kvm_host.h | 2 ++ arch/loongarch/include/asm/kvm_para.h | 2 ++ arch/loongarch/include/uapi/asm/kvm.h | 4 ++++ arch/loongarch/kvm/exit.c | 16 ++++++++++++++-- arch/loongarch/kvm/vcpu.c | 13 ++++++++++++- arch/loongarch/kvm/vm.c | 1 + 7 files changed, 36 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 7a3829008c59..e4c545fecaea 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -12,6 +12,7 @@ #define INSN_NOP 0x03400000 #define INSN_BREAK 0x002a0000 +#define INSN_HVCL 0x002b8000 #define ADDR_IMMMASK_LU52ID 0xFFF0000000000000 #define ADDR_IMMMASK_LU32ID 0x000FFFFF00000000 diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index c31619ae1fb4..3a92b6b024d5 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -31,6 +31,8 @@ #define KVM_HALT_POLL_NS_DEFAULT 500000 +#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ + KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP) struct kvm_vm_stat { struct kvm_vm_stat_generic generic; u64 pages; diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index a82bffbbf8a1..db4579923542 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -9,8 +9,10 @@ #define HYPERVISOR_VENDOR_SHIFT 8 #define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code) #define KVM_HCALL_CODE_PV_SERVICE 0 +#define KVM_HCALL_CODE_SWDBG 1 #define KVM_HCALL_PV_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_PV_SERVICE) #define KVM_HCALL_FUNC_PV_IPI 1 +#define KVM_HCALL_SWDBG HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG) /* * LoongArch hypercall return code diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 923d0bd38294..4cec8c16013c 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -15,10 +15,12 @@ */ #define __KVM_HAVE_READONLY_MEM +#define __KVM_HAVE_GUEST_DEBUG #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 +#define KVM_GUESTDBG_USE_SW_BP 0x00010000 /* * for KVM_GET_REGS and KVM_SET_REGS */ @@ -74,6 +76,8 @@ struct kvm_fpu { #define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1) #define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2) +/* Debugging: Special instruction for software breakpoint */ +#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) #define LOONGARCH_REG_SHIFT 3 #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 933879ad0ddc..19822813755d 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -774,23 +774,35 @@ static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) { larch_inst inst; unsigned int code; + int ret; inst.word = vcpu->arch.badi; code = inst.reg0i15_format.immediate; - update_pc(&vcpu->arch); + ret = RESUME_GUEST; switch (code) { case KVM_HCALL_PV_SERVICE: vcpu->stat.hypercall_exits++; kvm_handle_pv_service(vcpu); break; + case KVM_HCALL_SWDBG: + /* KVM_HC_SWDBG only in effective when SW_BP is enabled */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + ret = RESUME_HOST; + } else + vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE; + break; default: /* Treat it as noop intruction, only set return value */ vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE; break; } - return RESUME_GUEST; + if (ret == RESUME_GUEST) + update_pc(&vcpu->arch); + + return ret; } /* diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 76f2086ab68b..f22d10228cd2 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -248,7 +248,15 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - return -EINVAL; + if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) + return -EINVAL; + + if (dbg->control & KVM_GUESTDBG_ENABLE) + vcpu->guest_debug = dbg->control; + else + vcpu->guest_debug = 0; + + return 0; } static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) @@ -500,6 +508,9 @@ static int kvm_get_one_reg(struct kvm_vcpu *vcpu, case KVM_REG_LOONGARCH_COUNTER: *v = drdtime() + vcpu->kvm->arch.time_offset; break; + case KVM_REG_LOONGARCH_DEBUG_INST: + *v = INSN_HVCL + KVM_HCALL_SWDBG; + break; default: ret = -EINVAL; break; diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 6006a28653ad..06fd746b03b6 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -77,6 +77,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_IOEVENTFD: case KVM_CAP_MP_STATE: + case KVM_CAP_SET_GUEST_DEBUG: r = 1; break; case KVM_CAP_NR_VCPUS: -- Gitee From 109d98aee4b8865d7136f85ef0977859b9a6d3d0 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 13 Mar 2024 17:48:26 +0800 Subject: [PATCH 14/17] irqchip/loongson-eiointc: Add virt extension support Upstream: no With virt eiointc, interrupt can be routed to 256 vcpus Signed-off-by: Bibo Mao Signed-off-by: Xianglai Li --- drivers/irqchip/irq-loongson-eiointc.c | 44 ++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 6d7fc8a4b60e..c5c26b8e8d0c 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -23,6 +23,16 @@ #define EIOINTC_REG_ISR 0x1800 #define EIOINTC_REG_ROUTE 0x1c00 +#define EXTIOI_VIRT_FEATURES 0x40000000 +#define EXTIOI_HAS_VIRT_EXTENSION 0 +#define EXTIOI_HAS_ENABLE_OPTION 1 +#define EXTIOI_HAS_INT_ENCODE 2 +#define EXTIOI_HAS_CPU_ENCODE 3 +#define EXTIOI_VIRT_CONFIG 0x40000004 +#define EXTIOI_ENABLE 1 +#define EXTIOI_ENABLE_INT_ENCODE 2 +#define EXTIOI_ENABLE_CPU_ENCODE 3 + #define VEC_REG_COUNT 4 #define VEC_COUNT_PER_REG 64 #define VEC_COUNT (VEC_REG_COUNT * VEC_COUNT_PER_REG) @@ -41,6 +51,7 @@ struct eiointc_priv { cpumask_t cpuspan_map; struct fwnode_handle *domain_handle; struct irq_domain *eiointc_domain; + bool cpu_encoded; }; static struct eiointc_priv *eiointc_priv[MAX_IO_PICS]; @@ -91,7 +102,16 @@ static DEFINE_RAW_SPINLOCK(affinity_lock); static void virt_extioi_set_irq_route(int irq, unsigned int cpu) { - iocsr_write8(cpu_logical_map(cpu), EIOINTC_REG_ROUTE + irq); + int data; + + /* + * get irq route info for continuous 4 vectors + * and set affinity for specified vector + */ + data = iocsr_read32(EIOINTC_REG_ROUTE + (irq & ~3)); + data &= ~(0xff << ((irq & 3) * 8)); + data |= cpu_logical_map(cpu) << ((irq & 3) * 8); + iocsr_write32(data, EIOINTC_REG_ROUTE + (irq & ~3)); } static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity, bool force) @@ -116,7 +136,7 @@ static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *af vector = d->hwirq; regaddr = EIOINTC_REG_ENABLE + ((vector >> 5) << 2); - if (cpu_has_hypervisor) { + if (priv->cpu_encoded) { iocsr_write32(EIOINTC_ALL_ENABLE & ~BIT(vector & 0x1F), regaddr); virt_extioi_set_irq_route(vector, cpu); iocsr_write32(EIOINTC_ALL_ENABLE, regaddr); @@ -181,9 +201,10 @@ static int eiointc_router_init(unsigned int cpu) for (i = 0; i < eiointc_priv[0]->vec_count / 4; i++) { /* Route to Node-0 Core-0 */ - if (index == 0) - bit = (cpu_has_hypervisor ? cpu_logical_map(0) - : BIT(cpu_logical_map(0))); + if (eiointc_priv[index]->cpu_encoded) + bit = cpu_logical_map(0); + else if (index == 0) + bit = BIT(cpu_logical_map(0)); else bit = (eiointc_priv[index]->node << 4) | 1; @@ -384,7 +405,7 @@ static int __init acpi_cascade_irqdomain_init(void) static int __init eiointc_init(struct eiointc_priv *priv, int parent_irq, u64 node_map) { - int i; + int i, val; node_map = node_map ? node_map : -1ULL; for_each_possible_cpu(i) { @@ -404,6 +425,17 @@ static int __init eiointc_init(struct eiointc_priv *priv, int parent_irq, return -ENOMEM; } + if (cpu_has_hypervisor) { + val = iocsr_read32(EXTIOI_VIRT_FEATURES); + if (val & BIT(EXTIOI_HAS_CPU_ENCODE)) { + val = iocsr_read32(EXTIOI_VIRT_CONFIG); + val |= BIT(EXTIOI_ENABLE_CPU_ENCODE); + iocsr_write32(val, EXTIOI_VIRT_CONFIG); + priv->cpu_encoded = true; + pr_info("loongson-extioi: enable cpu encodig\n"); + } + } + eiointc_priv[nr_pics++] = priv; eiointc_router_init(0); irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch, priv); -- Gitee From 26cec1db0bfce661b27a0e892b03fba21e95242e Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Fri, 22 Mar 2024 16:24:10 +0800 Subject: [PATCH 15/17] LoongArch: KVM: Add steal time support in kvm side Upstream: no Steal time feature is added here in kvm side, VM can search supported features provided by KVM hypervisor, feature KVM_FEATURE_STEAL_TIME is added here. Like x86, steal time structure is saved in guest memory, one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to enable the feature. One cpu attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to save and restore base address of steal time structure when VM is migrated. Since it needs hypercall instruction emulation handling, and it is dependent on this patchset: https://lore.kernel.org/all/20240201031950.3225626-1-maobibo@loongson.cn/ Signed-off-by: Bibo Mao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 7 ++ arch/loongarch/include/asm/kvm_para.h | 10 ++ arch/loongarch/include/asm/loongarch.h | 1 + arch/loongarch/include/uapi/asm/kvm.h | 4 + arch/loongarch/kvm/exit.c | 35 ++++++- arch/loongarch/kvm/vcpu.c | 122 +++++++++++++++++++++++++ 6 files changed, 174 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 3a92b6b024d5..778874703483 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -30,6 +30,7 @@ #define KVM_PRIVATE_MEM_SLOTS 0 #define KVM_HALT_POLL_NS_DEFAULT 500000 +#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(1) #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP) @@ -197,6 +198,12 @@ struct kvm_vcpu_arch { struct kvm_mp_state mp_state; /* cpucfg */ u32 cpucfg[KVM_MAX_CPUCFG_REGS]; + /* paravirt steal time */ + struct { + u64 guest_addr; + u64 last_steal; + struct gfn_to_hva_cache cache; + } st; }; static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg) diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index db4579923542..032101b941d9 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -12,6 +12,7 @@ #define KVM_HCALL_CODE_SWDBG 1 #define KVM_HCALL_PV_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_PV_SERVICE) #define KVM_HCALL_FUNC_PV_IPI 1 +#define KVM_HCALL_FUNC_NOTIFY 2 #define KVM_HCALL_SWDBG HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG) /* @@ -21,6 +22,15 @@ #define KVM_HCALL_INVALID_CODE -1UL #define KVM_HCALL_INVALID_PARAMETER -2UL +#define KVM_STEAL_PHYS_VALID BIT_ULL(0) +#define KVM_STEAL_PHYS_MASK GENMASK_ULL(63, 6) +struct kvm_steal_time { + __u64 steal; + __u32 version; + __u32 flags; + __u32 pad[12]; +}; + /* * Hypercall interface for KVM hypervisor * diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index cd58cb042b6a..bbfc3579bfcd 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -168,6 +168,7 @@ #define KVM_SIGNATURE "KVM\0" #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4) #define KVM_FEATURE_PV_IPI BIT(1) +#define KVM_FEATURE_STEAL_TIME BIT(2) #ifndef __ASSEMBLY__ diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 4cec8c16013c..9891ed93816a 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -83,7 +83,11 @@ struct kvm_fpu { #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) #define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) + +/* Device Control API on vcpu fd */ #define KVM_LOONGARCH_VCPU_CPUCFG 0 +#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1 +#define KVM_LOONGARCH_VCPU_PVTIME_GPA 0 struct kvm_debug_exit_arch { }; diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 19822813755d..13a9e58f463b 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -209,7 +209,7 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu) static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) { int rd, rj; - unsigned int index; + unsigned int index, ret; unsigned long plv; rd = inst.reg2_format.rd; @@ -240,10 +240,13 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) vcpu->arch.gprs[rd] = 0; break; case CPUCFG_KVM_FEATURE: - if ((plv & CSR_CRMD_PLV) == PLV_KERN) - vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI; - else - vcpu->arch.gprs[rd] = 0; + ret = 0; + if ((plv & CSR_CRMD_PLV) == PLV_KERN) { + ret = KVM_FEATURE_PV_IPI; + if (sched_info_on()) + ret |= KVM_FEATURE_STEAL_TIME; + } + vcpu->arch.gprs[rd] = ret; break; default: vcpu->arch.gprs[rd] = 0; @@ -749,6 +752,25 @@ static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu) return 0; } +static int kvm_save_notify(struct kvm_vcpu *vcpu) +{ + unsigned long id, data; + + id = vcpu->arch.gprs[LOONGARCH_GPR_A1]; + data = vcpu->arch.gprs[LOONGARCH_GPR_A2]; + switch (id) { + case KVM_FEATURE_STEAL_TIME: + vcpu->arch.st.guest_addr = data; + vcpu->arch.st.last_steal = current->sched_info.run_delay; + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); + break; + default: + break; + }; + + return 0; +}; + /* * hypercall emulation always return to guest, Caller should check retval. */ @@ -762,6 +784,9 @@ static void kvm_handle_pv_service(struct kvm_vcpu *vcpu) kvm_pv_send_ipi(vcpu); ret = KVM_HCALL_STATUS_SUCCESS; break; + case KVM_HCALL_FUNC_NOTIFY: + ret = kvm_save_notify(vcpu); + break; default: ret = KVM_HCALL_INVALID_CODE; break; diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index f22d10228cd2..49c1172f0005 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -31,6 +31,115 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { sizeof(kvm_vcpu_stats_desc), }; +static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) +{ + struct kvm_steal_time __user *st; + struct gfn_to_hva_cache *ghc; + struct kvm_memslots *slots; + gpa_t gpa; + u64 steal; + u32 version; + + ghc = &vcpu->arch.st.cache; + gpa = vcpu->arch.st.guest_addr; + if (!(gpa & KVM_STEAL_PHYS_VALID)) + return; + + gpa &= KVM_STEAL_PHYS_MASK; + slots = kvm_memslots(vcpu->kvm); + if (slots->generation != ghc->generation || gpa != ghc->gpa) { + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, + sizeof(*st))) { + ghc->gpa = INVALID_GPA; + return; + } + } + + st = (struct kvm_steal_time __user *)ghc->hva; + unsafe_get_user(version, &st->version, out); + if (version & 1) + version += 1; + version += 1; + unsafe_put_user(version, &st->version, out); + /* Make sure st->version is written first */ + smp_wmb(); + + unsafe_get_user(steal, &st->steal, out); + steal += current->sched_info.run_delay - + vcpu->arch.st.last_steal; + vcpu->arch.st.last_steal = current->sched_info.run_delay; + unsafe_put_user(steal, &st->steal, out); + + /* Make sure st->steal is written first */ + smp_wmb(); + version += 1; + unsafe_put_user(version, &st->version, out); +out: + mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); +} + +static bool kvm_pvtime_supported(void) +{ + return !!sched_info_on(); +} + +static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 __user *user = (u64 __user *)attr->addr; + struct kvm *kvm = vcpu->kvm; + u64 gpa; + int ret = 0; + int idx; + + if (!kvm_pvtime_supported() || + attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + if (get_user(gpa, user)) + return -EFAULT; + + /* Check the address is in a valid memslot */ + idx = srcu_read_lock(&kvm->srcu); + if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT))) + ret = -EINVAL; + srcu_read_unlock(&kvm->srcu, idx); + + if (!ret) + vcpu->arch.st.guest_addr = gpa; + + return ret; +} + +static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 __user *user = (u64 __user *)attr->addr; + u64 gpa; + + if (!kvm_pvtime_supported() || + attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + gpa = vcpu->arch.st.guest_addr; + if (put_user(gpa, user)) + return -EFAULT; + + return 0; +} + +static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_LOONGARCH_VCPU_PVTIME_GPA: + if (kvm_pvtime_supported()) + return 0; + } + + return -ENXIO; +} + /* * kvm_check_requests - check and handle pending vCPU requests * @@ -48,6 +157,9 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu) if (kvm_dirty_ring_check_request(vcpu)) return RESUME_HOST; + if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) + kvm_update_stolen_time(vcpu); + return RESUME_GUEST; } @@ -672,6 +784,9 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, case KVM_LOONGARCH_VCPU_CPUCFG: ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr); break; + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: + ret = kvm_loongarch_pvtime_has_attr(vcpu, attr); + break; default: break; } @@ -704,6 +819,9 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, case KVM_LOONGARCH_VCPU_CPUCFG: ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr); break; + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: + ret = kvm_loongarch_pvtime_get_attr(vcpu, attr); + break; default: break; } @@ -726,6 +844,9 @@ static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu, case KVM_LOONGARCH_VCPU_CPUCFG: ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr); break; + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: + ret = kvm_loongarch_pvtime_set_attr(vcpu, attr); + break; default: break; } @@ -1084,6 +1205,7 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Control guest page CCA attribute */ change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT); + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); /* Don't bother restoring registers multiple times unless necessary */ if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE) -- Gitee From 31290a45008c1ab6b0a33905bfc3dfff28a2d1da Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Fri, 22 Mar 2024 16:42:48 +0800 Subject: [PATCH 16/17] LoongArch: Add steal time support in guest side Upstream: no Percpu struct kvm_steal_time is added here, its size is 64 bytes and also defined as 64 bytes, so that the whole structure is in one physical page. When vcpu is onlined, function pv_register_steal_time() is called. This function will pass physical address of struct kvm_steal_time and tells hypervisor to enable steal time. When vcpu is offline, physical address is set as 0 and tells hypervisor to disable steal time. Signed-off-by: Bibo Mao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/paravirt.h | 5 + arch/loongarch/kernel/paravirt.c | 130 ++++++++++++++++++++++++++ arch/loongarch/kernel/time.c | 2 + 3 files changed, 137 insertions(+) diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h index 58f7b7b89f2c..fe27fb5e82b8 100644 --- a/arch/loongarch/include/asm/paravirt.h +++ b/arch/loongarch/include/asm/paravirt.h @@ -17,11 +17,16 @@ static inline u64 paravirt_steal_clock(int cpu) } int pv_ipi_init(void); +int __init pv_time_init(void); #else static inline int pv_ipi_init(void) { return 0; } +static inline int pv_time_init(void) +{ + return 0; +} #endif // CONFIG_PARAVIRT #endif diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index 9044ed62045c..56182c64ab38 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -5,10 +5,13 @@ #include #include #include +#include #include struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; +static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); +static int has_steal_clock; static u64 native_steal_clock(int cpu) { @@ -17,6 +20,57 @@ static u64 native_steal_clock(int cpu) DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); +static bool steal_acc = true; +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} +early_param("no-steal-acc", parse_no_stealacc); + +static u64 para_steal_clock(int cpu) +{ + u64 steal; + struct kvm_steal_time *src; + int version; + + src = &per_cpu(steal_time, cpu); + do { + + version = src->version; + /* Make sure that the version is read before the steal */ + virt_rmb(); + steal = src->steal; + /* Make sure that the steal is read before the next version */ + virt_rmb(); + + } while ((version & 1) || (version != src->version)); + return steal; +} + +static int pv_register_steal_time(void) +{ + int cpu = smp_processor_id(); + struct kvm_steal_time *st; + unsigned long addr; + + if (!has_steal_clock) + return -EPERM; + + st = &per_cpu(steal_time, cpu); + addr = per_cpu_ptr_to_phys(st); + + /* The whole structure kvm_steal_time should be one page */ + if (PFN_DOWN(addr) != PFN_DOWN(addr + sizeof(*st))) { + pr_warn("Illegal PV steal time addr %lx\n", addr); + return -EFAULT; + } + + addr |= KVM_STEAL_PHYS_VALID; + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, addr); + return 0; +} + #ifdef CONFIG_SMP static void pv_send_ipi_single(int cpu, unsigned int action) { @@ -110,6 +164,32 @@ static void pv_init_ipi(void) if (r < 0) panic("SWI0 IRQ request failed\n"); } + +static void pv_disable_steal_time(void) +{ + if (has_steal_clock) + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, 0); +} + +static int pv_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + pv_register_steal_time(); + local_irq_restore(flags); + return 0; +} + +static int pv_cpu_down_prepare(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + pv_disable_steal_time(); + local_irq_restore(flags); + return 0; +} #endif static bool kvm_para_available(void) @@ -149,3 +229,53 @@ int __init pv_ipi_init(void) return 1; } + +static void pv_cpu_reboot(void *unused) +{ + pv_disable_steal_time(); +} + +static int pv_reboot_notify(struct notifier_block *nb, unsigned long code, + void *unused) +{ + on_each_cpu(pv_cpu_reboot, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block pv_reboot_nb = { + .notifier_call = pv_reboot_notify, +}; + +int __init pv_time_init(void) +{ + int feature; + + if (!cpu_has_hypervisor) + return 0; + if (!kvm_para_available()) + return 0; + + feature = read_cpucfg(CPUCFG_KVM_FEATURE); + if (!(feature & KVM_FEATURE_STEAL_TIME)) + return 0; + + has_steal_clock = 1; + if (pv_register_steal_time()) { + has_steal_clock = 0; + return 0; + } + + register_reboot_notifier(&pv_reboot_nb); + static_call_update(pv_steal_clock, para_steal_clock); + static_key_slow_inc(¶virt_steal_enabled); + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); + +#ifdef CONFIG_SMP + if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "loongarch/pv:online", + pv_cpu_online, pv_cpu_down_prepare) < 0) + pr_err("Failed to install cpu hotplug callbacks\n"); +#endif + pr_info("Using stolen time PV\n"); + return 0; +} diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index fd5354f9be7c..46d7d40c87e3 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -15,6 +15,7 @@ #include #include +#include #include u64 cpu_clock_freq; @@ -214,4 +215,5 @@ void __init time_init(void) constant_clockevent_init(); constant_clocksource_init(); + pv_time_init(); } -- Gitee From a0bf5ad255e8b7bc7f8bc54ea84298319509b623 Mon Sep 17 00:00:00 2001 From: Song Gao Date: Thu, 28 Mar 2024 17:10:08 +0800 Subject: [PATCH 17/17] LoongArch: KVM: Add PMU support Upstream: no Add PMU device emulation Signed-off-by: Song Gao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_csr.h | 5 ++ arch/loongarch/include/asm/kvm_host.h | 14 ++++ arch/loongarch/include/asm/kvm_vcpu.h | 2 + arch/loongarch/include/asm/loongarch.h | 1 + arch/loongarch/kvm/exit.c | 7 ++ arch/loongarch/kvm/vcpu.c | 97 +++++++++++++++++++++++++- 6 files changed, 125 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/kvm_csr.h b/arch/loongarch/include/asm/kvm_csr.h index 724ca8b7b401..476c9f620dd5 100644 --- a/arch/loongarch/include/asm/kvm_csr.h +++ b/arch/loongarch/include/asm/kvm_csr.h @@ -208,4 +208,9 @@ static __always_inline void kvm_change_sw_gcsr(struct loongarch_csrs *csr, csr->csrs[gid] |= val & _mask; } +#define KVM_PMU_PLV_ENABLE (CSR_PERFCTRL_PLV0 | \ + CSR_PERFCTRL_PLV1 | \ + CSR_PERFCTRL_PLV2 | \ + CSR_PERFCTRL_PLV3) + #endif /* __ASM_LOONGARCH_KVM_CSR_H__ */ diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 778874703483..c146d2ebdb90 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -129,6 +129,7 @@ enum emulation_result { #define KVM_LARCH_LASX (0x1 << 2) #define KVM_LARCH_SWCSR_LATEST (0x1 << 3) #define KVM_LARCH_HWCSR_USABLE (0x1 << 4) +#define KVM_LARCH_PERF (0x1 << 5) struct kvm_vcpu_arch { /* @@ -204,6 +205,9 @@ struct kvm_vcpu_arch { u64 last_steal; struct gfn_to_hva_cache cache; } st; + /* Save host pmu csr */ + u64 perf_ctrl[4]; + u64 perf_cntr[4]; }; static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg) @@ -231,6 +235,16 @@ static inline bool kvm_guest_has_lasx(struct kvm_vcpu_arch *arch) return arch->cpucfg[2] & CPUCFG2_LASX; } +static inline bool kvm_guest_has_pmu(struct kvm_vcpu_arch *arch) +{ + return arch->cpucfg[6] & CPUCFG6_PMP; +} + +static inline int kvm_get_pmu_num(struct kvm_vcpu_arch *arch) +{ + return (arch->cpucfg[6] & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT; +} + /* Debug: dump vcpu state */ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index 9f53950959da..1da24994b838 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -75,6 +75,8 @@ static inline void kvm_save_lasx(struct loongarch_fpu *fpu) { } static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { } #endif +int kvm_own_pmu(struct kvm_vcpu *vcpu); + void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz); void kvm_reset_timer(struct kvm_vcpu *vcpu); void kvm_save_timer(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index bbfc3579bfcd..e852c0f62eb7 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -119,6 +119,7 @@ #define CPUCFG6_PMP BIT(0) #define CPUCFG6_PAMVER GENMASK(3, 1) #define CPUCFG6_PMNUM GENMASK(7, 4) +#define CPUCFG6_PMNUM_SHIFT 4 #define CPUCFG6_PMBITS GENMASK(13, 8) #define CPUCFG6_UPM BIT(14) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 13a9e58f463b..8affc6d4a66e 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -83,6 +83,13 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst) rj = inst.reg2csr_format.rj; csrid = inst.reg2csr_format.csr; + if (csrid >= LOONGARCH_CSR_PERFCTRL0 && csrid <= LOONGARCH_CSR_PERFCNTR3) { + if (!kvm_own_pmu(vcpu)) { + vcpu->arch.pc -= 4; + return EMULATE_DONE; + } + } + /* Process CSR ops */ switch (rj) { case 0: /* process csrrd */ diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 49c1172f0005..685f2826d022 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -544,6 +544,12 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) case LOONGARCH_CPUCFG5: *v = GENMASK(31, 0); return 0; + case LOONGARCH_CPUCFG6: + if (cpu_has_pmp) + *v = GENMASK(14, 0); + else + *v = 0; + return 0; case LOONGARCH_CPUCFG16: *v = GENMASK(16, 0); return 0; @@ -562,7 +568,7 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) static int kvm_check_cpucfg(int id, u64 val) { - int ret; + int ret, host; u64 mask = 0; ret = _kvm_get_cpucfg_mask(id, &mask); @@ -588,6 +594,18 @@ static int kvm_check_cpucfg(int id, u64 val) /* LASX architecturally implies LSX and FP but val does not satisfy that */ return -EINVAL; return 0; + case LOONGARCH_CPUCFG6: + if (val & CPUCFG6_PMP) { + host = read_cpucfg(6); + if ((val & CPUCFG6_PMBITS) != (host & CPUCFG6_PMBITS)) + /* Guest pmbits must be the same with host */ + return -EINVAL; + if ((val & CPUCFG6_PMNUM) > (host & CPUCFG6_PMNUM)) + return -EINVAL; + if ((val & CPUCFG6_UPM) && !(host & CPUCFG6_UPM)) + return -EINVAL; + } + return 0; default: /* * Values for the other CPUCFG IDs are not being further validated @@ -767,6 +785,7 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu, { switch (attr->attr) { case 2: + case 6: return 0; default: return -ENXIO; @@ -1067,6 +1086,77 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) preempt_enable(); } +int kvm_own_pmu(struct kvm_vcpu *vcpu) +{ + unsigned long val; + + if (!kvm_guest_has_pmu(&vcpu->arch)) + return -EINVAL; + + preempt_disable(); + val = read_csr_gcfg() & ~CSR_GCFG_GPERF; + val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT; + write_csr_gcfg(val); + + vcpu->arch.aux_inuse |= KVM_LARCH_PERF; + preempt_enable(); + return 0; +} + +static void kvm_lose_pmu(struct kvm_vcpu *vcpu) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + + if (!(vcpu->arch.aux_inuse & KVM_LARCH_PERF)) + return; + + /* save guest pmu csr */ + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); + kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL0, 0); + kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL1, 0); + kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL2, 0); + kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL3, 0); + /* Disable pmu access from guest */ + write_csr_gcfg(read_csr_gcfg() & ~CSR_GCFG_GPERF); + + if (((kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3)) + & KVM_PMU_PLV_ENABLE) == 0) + vcpu->arch.aux_inuse &= ~KVM_LARCH_PERF; +} + +static void kvm_restore_pmu(struct kvm_vcpu *vcpu) +{ + unsigned long val; + struct loongarch_csrs *csr = vcpu->arch.csr; + + if (!(vcpu->arch.aux_inuse & KVM_LARCH_PERF)) + return; + + /* Set PM0-PM(num) to Guest */ + val = read_csr_gcfg() & ~CSR_GCFG_GPERF; + val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT; + write_csr_gcfg(val); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); +} + + int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { int intr = (int)irq->irq; @@ -1205,6 +1295,10 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Control guest page CCA attribute */ change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT); + + /* Restore hardware perf csr */ + kvm_restore_pmu(vcpu); + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); /* Don't bother restoring registers multiple times unless necessary */ @@ -1290,6 +1384,7 @@ static int _kvm_vcpu_put(struct kvm_vcpu *vcpu, int cpu) struct loongarch_csrs *csr = vcpu->arch.csr; kvm_lose_fpu(vcpu); + kvm_lose_pmu(vcpu); /* * Update CSR state from hardware if software CSR state is stale, -- Gitee