diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 35be0d82daa3921e249fa3f82ea14bf32010c15e..08dc6ddb6f8dbf2227ec46b9d5d79d702dcf8198 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2695,6 +2695,10 @@ [KVM,ARM] Allow use of GICv4 for direct injection of LPIs. + kvm-arm.dvmbm_enabled= + [KVM,ARM] Allow use of HiSilicon DVMBM capability. + Default: 0 + kvm_cma_resv_ratio=n [PPC] Reserves given percentage from system memory area for contiguous memory allocation for KVM hash pagetable diff --git a/arch/arm64/configs/tencent.config b/arch/arm64/configs/tencent.config index bdb023092d3b587ebe849f9b8e856794d0c431e6..b96a84543429a0c15cc29fc304d6c0f2c8dabf9d 100644 --- a/arch/arm64/configs/tencent.config +++ b/arch/arm64/configs/tencent.config @@ -1787,3 +1787,4 @@ CONFIG_HISILICON_ERRATUM_162100803=y CONFIG_ARM64_HDBSS=y # end of ARMv9.5 architectural features CONFIG_VIRT_PLAT_DEV=y +CONFIG_KVM_HISI_VIRT=y diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ce9908fe0acf03cd755b4a83e2f88e8736233e2a..406e0444e304eb39ca54d5d83f0b323f08c47b29 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -51,6 +51,7 @@ #define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) #define KVM_REQ_SUSPEND KVM_ARCH_REQ(6) #define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7) +#define KVM_REQ_RELOAD_TLBI_DVMBM KVM_ARCH_REQ(8) #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) @@ -313,6 +314,11 @@ struct kvm_arch { bool is_realm; struct realm realm; +#ifdef CONFIG_KVM_HISI_VIRT + spinlock_t sched_lock; + cpumask_var_t sched_cpus; /* Union of all vcpu's cpus_ptr */ + u64 tlbi_dvmbm; +#endif }; struct kvm_vcpu_fault_info { @@ -648,6 +654,10 @@ struct kvm_vcpu_arch { u64 br_el2; u64 prod_el2; } hdbss) +#ifdef CONFIG_KVM_HISI_VIRT + /* pCPUs this vCPU can be scheduled on. Pure copy of current->cpus_ptr */ + cpumask_var_t sched_cpus; + cpumask_var_t pre_sched_cpus; #endif }; @@ -1242,4 +1252,7 @@ extern unsigned int twedel; void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu); bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu); +extern bool kvm_ncsnp_support; +extern bool kvm_dvmbm_support; + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 5918769294edd70d6a07a0280633780ccee5c96f..4f7e9711a4afb3e54e0769731ba18230db3d75c5 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -218,7 +218,7 @@ static inline void __clean_dcache_guest_page(void *va, size_t size) * faulting in pages. Furthermore, FWB implies IDC, so cleaning to * PoU is not required either in this case. */ - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + if (kvm_ncsnp_support || cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) return; kvm_flush_dcache_to_poc(va, size); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index d10d3fed31d9334662e429a5af28a1da2b217dd5..82957f93fa2186cb63cf80aa3cb3c41421f25320 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -111,6 +111,11 @@ KVM_NVHE_ALIAS(__hyp_rodata_end); /* pKVM static key */ KVM_NVHE_ALIAS(kvm_protected_mode_initialized); +#ifdef CONFIG_KVM_HISI_VIRT +/* Capability of non-cacheable snooping */ +KVM_NVHE_ALIAS(kvm_ncsnp_support); +#endif + #endif /* CONFIG_KVM */ #ifdef CONFIG_EFI_ZBOOT diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 1a777715199fa82bc1edd96da155058f4a4c72df..99cda39ac86877cf76f438b91cee3f1063054152 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -5,6 +5,7 @@ source "virt/lib/Kconfig" source "virt/kvm/Kconfig" +source "arch/arm64/kvm/hisilicon/Kconfig" menuconfig VIRTUALIZATION bool "Virtualization" diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 9221291d99a6ef8cf98673e12f1c0a5b01220d84..d2ccd538e2c2c1c1c56fefc41cf8f25c698b6970 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -25,6 +25,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ kvm-$(CONFIG_VIRT_PLAT_DEV) += vgic/shadow_dev.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o +obj-$(CONFIG_KVM_HISI_VIRT) += hisilicon/ always-y := hyp_constants.h hyp-constants.s diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index b58a7c46d9936c5fda01b90fbebdd6210276c3da..306f45b33e9e2e2a56870d172885002582c2b8dd 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -51,6 +51,8 @@ static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; DEFINE_STATIC_KEY_FALSE(kvm_rme_is_available); +#include "hisilicon/hisi_virt.h" + DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); @@ -60,6 +62,13 @@ DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); static bool vgic_present, kvm_arm_initialised; +/* Capability of non-cacheable snooping */ +bool kvm_ncsnp_support; + +/* Capability of DVMBM */ +bool kvm_dvmbm_support; + + static DEFINE_PER_CPU(unsigned char, kvm_hyp_initialized); bool is_kvm_arm_initialised(void) @@ -239,6 +248,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret; + ret = kvm_sched_affinity_vm_init(kvm); + if (ret) + return ret; + mutex_init(&kvm->arch.config_lock); #ifdef CONFIG_LOCKDEP @@ -321,6 +334,8 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) */ void kvm_arch_destroy_vm(struct kvm *kvm) { + kvm_sched_affinity_vm_destroy(kvm); + bitmap_free(kvm->arch.pmu_filter); free_cpumask_var(kvm->arch.supported_cpus); @@ -533,6 +548,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) if (err) return err; + err = kvm_sched_affinity_vcpu_init(vcpu); + if (err) + return err; + err = kvm_share_hyp(vcpu, vcpu + 1); if (err) kvm_vgic_vcpu_destroy(vcpu); @@ -551,6 +570,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_pmu_vcpu_destroy(vcpu); kvm_vgic_vcpu_destroy(vcpu); kvm_arm_vcpu_destroy(vcpu); + + kvm_sched_affinity_vcpu_destroy(vcpu); } void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) @@ -612,6 +633,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus)) vcpu_set_on_unsupported_cpu(vcpu); + + kvm_tlbi_dvmbm_vcpu_load(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -633,6 +656,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_arm_vmid_clear_active(); vcpu_clear_on_unsupported_cpu(vcpu); + + kvm_tlbi_dvmbm_vcpu_put(vcpu); } static void __kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) @@ -1017,6 +1042,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_dirty_ring_check_request(vcpu)) return 0; + + if (kvm_check_request(KVM_REQ_RELOAD_TLBI_DVMBM, vcpu)) + kvm_hisi_reload_lsudvmbm(vcpu->kvm); } return 1; @@ -2754,6 +2782,15 @@ static __init int kvm_arm_init(void) return err; } + probe_hisi_cpu_type(); + kvm_ncsnp_support = hisi_ncsnp_supported(); + kvm_dvmbm_support = hisi_dvmbm_supported(); + kvm_info("KVM ncsnp %s\n", kvm_ncsnp_support ? "enabled" : "disabled"); + kvm_info("KVM dvmbm %s\n", kvm_dvmbm_support ? "enabled" : "disabled"); + + if (kvm_dvmbm_support) + kvm_get_pg_cfg(); + in_hyp_mode = is_kernel_in_hyp_mode(); if (in_hyp_mode) diff --git a/arch/arm64/kvm/hisilicon/Kconfig b/arch/arm64/kvm/hisilicon/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..6536f897a32eb374b7131d7b5550bed498a15796 --- /dev/null +++ b/arch/arm64/kvm/hisilicon/Kconfig @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +config KVM_HISI_VIRT + bool "HiSilicon SoC specific virtualization features" + depends on ARCH_HISI + help + Support for HiSilicon SoC specific virtualization features. + On non-HiSilicon platforms, say N here. diff --git a/arch/arm64/kvm/hisilicon/Makefile b/arch/arm64/kvm/hisilicon/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..849f99d1526d8540d68d19e15465a780f2c5187a --- /dev/null +++ b/arch/arm64/kvm/hisilicon/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_KVM_HISI_VIRT) += hisi_virt.o diff --git a/arch/arm64/kvm/hisilicon/hisi_virt.c b/arch/arm64/kvm/hisilicon/hisi_virt.c new file mode 100644 index 0000000000000000000000000000000000000000..cf1e8fe3f4c4074af59f15f8330a99bf8a966c3b --- /dev/null +++ b/arch/arm64/kvm/hisilicon/hisi_virt.c @@ -0,0 +1,604 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright(c) 2022 Huawei Technologies Co., Ltd + */ + +#include +#include +#include +#include +#include "hisi_virt.h" +#include + +static enum hisi_cpu_type cpu_type = UNKNOWN_HI_TYPE; + +static bool dvmbm_enabled; + +static const char * const hisi_cpu_type_str[] = { + "Hisi1612", + "Hisi1616", + "Hisi1620", + "HIP09", + "HIP10", + "HIP10C", + "HIP12", + "Unknown" +}; + +/* ACPI Hisi oem table id str */ +static const char * const oem_str[] = { + "HIP06 ", /* Hisi 1612 */ + "HIP07 ", /* Hisi 1616 */ + "HIP08 ", /* Hisi 1620 */ + "HIP09 ", /* HIP09 */ + "HIP10 ", /* HIP10 */ + "HIP10C ", /* HIP10C */ + "HIP12 " /* HIP12 */ +}; + +/* + * Probe Hisi CPU type form ACPI. + */ +static enum hisi_cpu_type acpi_get_hisi_cpu_type(void) +{ + struct acpi_table_header *table; + acpi_status status; + int i, str_size = ARRAY_SIZE(oem_str); + + /* Get oem table id from ACPI table header */ + status = acpi_get_table(ACPI_SIG_DSDT, 0, &table); + if (ACPI_FAILURE(status)) { + pr_warn("Failed to get ACPI table: %s\n", + acpi_format_exception(status)); + return UNKNOWN_HI_TYPE; + } + + for (i = 0; i < str_size; ++i) { + if (!strncmp(oem_str[i], table->oem_table_id, 8)) + return i; + } + + return UNKNOWN_HI_TYPE; +} + +/* of Hisi cpu model str */ +static const char * const of_model_str[] = { + "Hi1612", + "Hi1616" +}; + +/* + * Probe Hisi CPU type from DT. + */ +static enum hisi_cpu_type of_get_hisi_cpu_type(void) +{ + const char *model; + int ret, i, str_size = ARRAY_SIZE(of_model_str); + + /* + * Note: There may not be a "model" node in FDT, which + * is provided by the vendor. In this case, we are not + * able to get CPU type information through this way. + */ + ret = of_property_read_string(of_root, "model", &model); + if (ret < 0) { + pr_warn("Failed to get Hisi cpu model by OF.\n"); + return UNKNOWN_HI_TYPE; + } + + for (i = 0; i < str_size; ++i) { + if (strstr(model, of_model_str[i])) + return i; + } + + return UNKNOWN_HI_TYPE; +} + +void probe_hisi_cpu_type(void) +{ + if (!acpi_disabled) + cpu_type = acpi_get_hisi_cpu_type(); + else + cpu_type = of_get_hisi_cpu_type(); + + kvm_info("detected: Hisi CPU type '%s'\n", hisi_cpu_type_str[cpu_type]); +} + +/* + * We have the fantastic HHA ncsnp capability on Kunpeng 920, + * with which hypervisor doesn't need to perform a lot of cache + * maintenance like before (in case the guest has non-cacheable + * Stage-1 mappings). + */ +#define NCSNP_MMIO_BASE 0x20107E238 +bool hisi_ncsnp_supported(void) +{ + void __iomem *base; + unsigned int high; + bool supported = false; + + if (cpu_type != HI_1620) + return supported; + + base = ioremap(NCSNP_MMIO_BASE, 4); + if (!base) { + pr_warn("Unable to map MMIO region when probing ncsnp!\n"); + return supported; + } + + high = readl_relaxed(base) >> 28; + iounmap(base); + if (high != 0x1) + supported = true; + + return supported; +} + +static int __init early_dvmbm_enable(char *buf) +{ + return strtobool(buf, &dvmbm_enabled); +} +early_param("kvm-arm.dvmbm_enabled", early_dvmbm_enable); + +static void hardware_enable_dvmbm(void *data) +{ + u64 val; + + val = read_sysreg_s(SYS_LSUDVM_CTRL_EL2); + val |= LSUDVM_CTLR_EL2_MASK; + write_sysreg_s(val, SYS_LSUDVM_CTRL_EL2); +} + +static void hardware_disable_dvmbm(void *data) +{ + u64 val; + + val = read_sysreg_s(SYS_LSUDVM_CTRL_EL2); + val &= ~LSUDVM_CTLR_EL2_MASK; + write_sysreg_s(val, SYS_LSUDVM_CTRL_EL2); +} + +bool hisi_dvmbm_supported(void) +{ + if (cpu_type != HI_IP10 && cpu_type != HI_IP10C && + cpu_type != HI_IP12) + return false; + + if (!is_kernel_in_hyp_mode()) { + kvm_info("Hisi dvmbm not supported by KVM nVHE mode\n"); + return false; + } + + /* Determine whether DVMBM is supported by the hardware */ + if (!(read_sysreg(aidr_el1) & AIDR_EL1_DVMBM_MASK)) + return false; + + /* User provided kernel command-line parameter */ + if (!dvmbm_enabled) { + on_each_cpu(hardware_disable_dvmbm, NULL, 1); + return false; + } + + /* + * Enable TLBI Broadcast optimization by setting + * LSUDVM_CTRL_EL2's bit[0]. + */ + on_each_cpu(hardware_enable_dvmbm, NULL, 1); + return true; +} + +int kvm_sched_affinity_vcpu_init(struct kvm_vcpu *vcpu) +{ + if (!kvm_dvmbm_support) + return 0; + + if (!zalloc_cpumask_var(&vcpu->arch.sched_cpus, GFP_ATOMIC)) + return -ENOMEM; + + if (!zalloc_cpumask_var(&vcpu->arch.pre_sched_cpus, GFP_ATOMIC)) { + free_cpumask_var(vcpu->arch.sched_cpus); + return -ENOMEM; + } + + return 0; +} + +void kvm_sched_affinity_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + if (!kvm_dvmbm_support) + return; + + free_cpumask_var(vcpu->arch.sched_cpus); + free_cpumask_var(vcpu->arch.pre_sched_cpus); +} + +static void __kvm_write_lsudvmbm(struct kvm *kvm) +{ + write_sysreg_s(kvm->arch.tlbi_dvmbm, SYS_LSUDVMBM_EL2); +} + +static void kvm_write_lsudvmbm(struct kvm *kvm) +{ + spin_lock(&kvm->arch.sched_lock); + __kvm_write_lsudvmbm(kvm); + spin_unlock(&kvm->arch.sched_lock); +} + +static int kvm_dvmbm_get_dies_info(struct kvm *kvm, u64 *vm_aff3s, int size) +{ + int num = 0, cpu; + + for_each_cpu(cpu, kvm->arch.sched_cpus) { + bool found = false; + u64 aff3; + int i; + + if (num >= size) + break; + + aff3 = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 3); + for (i = 0; i < num; i++) { + if (vm_aff3s[i] == aff3) { + found = true; + break; + } + } + + if (!found) + vm_aff3s[num++] = aff3; + } + + return num; +} + +static u32 socket_num, die_num; + +static u32 kvm_get_socket_num(void) +{ + int socket_id[MAX_PG_CFG_SOCKETS], cpu; + u32 num = 0; + + for_each_cpu(cpu, cpu_possible_mask) { + bool found = false; + u64 aff3, socket; + int i; + + aff3 = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 3); + /* aff3[7:3]: socket ID */ + socket = (aff3 & SOCKET_ID_MASK) >> SOCKET_ID_SHIFT; + for (i = 0; i < num; i++) { + if (socket_id[i] == socket) { + found = true; + break; + } + } + if (!found) + socket_id[num++] = socket; + } + return num; +} + +static u32 kvm_get_die_num(void) +{ + int die_id[MAX_DIES_PER_SOCKET], cpu; + u32 num = 0; + + for_each_cpu(cpu, cpu_possible_mask) { + bool found = false; + u64 aff3, die; + int i; + + aff3 = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 3); + /* aff3[2:0]: die ID */ + die = aff3 & DIE_ID_MASK; + for (i = 0; i < num; i++) { + if (die_id[i] == die) { + found = true; + break; + } + } + if (!found) + die_id[num++] = die; + } + return num; +} + +static u32 g_die_pg[MAX_PG_CFG_SOCKETS * MAX_DIES_PER_SOCKET][MAX_CLUSTERS_PER_DIE]; + +static void kvm_get_die_pg(unsigned long pg_cfg, int socket_id, int die_id) +{ + u32 pg_num = 0, i, j; + u32 pg_flag[MAX_CLUSTERS_PER_DIE]; + u32 die_tmp = socket_id * die_num + die_id; + + for (i = 0; i < MAX_CLUSTERS_PER_DIE; i++) { + if (test_bit(i, &pg_cfg)) + pg_num++; + g_die_pg[die_tmp][i] = i; + pg_flag[i] = 0; + } + + for (i = 0; i < MAX_CLUSTERS_PER_DIE - pg_num; i++) { + if (test_bit(i, &pg_cfg)) { + for (j = 0; j < pg_num; j++) { + u32 cluster_bak = MAX_CLUSTERS_PER_DIE - pg_num + j; + + if (!test_bit(cluster_bak, &pg_cfg) && + !pg_flag[cluster_bak]) { + pg_flag[cluster_bak] = 1; + g_die_pg[die_tmp][i] = cluster_bak; + g_die_pg[die_tmp][cluster_bak] = i; + break; + } + } + } + } +} + +static void kvm_update_vm_lsudvmbm(struct kvm *kvm) +{ + u64 mpidr, aff3, aff2, aff1, phy_aff2; + u64 vm_aff3s[DVMBM_MAX_DIES]; + u64 val; + int cpu, nr_dies; + u32 socket_id, die_id; + + nr_dies = kvm_dvmbm_get_dies_info(kvm, vm_aff3s, DVMBM_MAX_DIES); + if (nr_dies > 2) { + val = DVMBM_RANGE_ALL_DIES << DVMBM_RANGE_SHIFT; + goto out_update; + } + + if (nr_dies == 1) { + val = DVMBM_RANGE_ONE_DIE << DVMBM_RANGE_SHIFT | + vm_aff3s[0] << DVMBM_DIE1_SHIFT; + + /* fulfill bits [52:0] */ + for_each_cpu(cpu, kvm->arch.sched_cpus) { + mpidr = cpu_logical_map(cpu); + aff3 = MPIDR_AFFINITY_LEVEL(mpidr, 3); + aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + aff1 = MPIDR_AFFINITY_LEVEL(mpidr, 1); + socket_id = (aff3 & SOCKET_ID_MASK) >> SOCKET_ID_SHIFT; + die_id = (aff3 & DIE_ID_MASK) >> DIE_ID_SHIFT; + if (die_id == TOTEM_B_ID) + die_id = 0; + else + die_id = 1; + + phy_aff2 = g_die_pg[socket_id * die_num + die_id][aff2]; + val |= 1ULL << (phy_aff2 * 4 + aff1); + } + + goto out_update; + } + + /* nr_dies == 2 */ + val = DVMBM_RANGE_TWO_DIES << DVMBM_RANGE_SHIFT | + DVMBM_GRAN_CLUSTER << DVMBM_GRAN_SHIFT | + vm_aff3s[0] << DVMBM_DIE1_SHIFT | + vm_aff3s[1] << DVMBM_DIE2_SHIFT; + + /* and fulfill bits [43:0] */ + for_each_cpu(cpu, kvm->arch.sched_cpus) { + mpidr = cpu_logical_map(cpu); + aff3 = MPIDR_AFFINITY_LEVEL(mpidr, 3); + aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + socket_id = (aff3 & SOCKET_ID_MASK) >> SOCKET_ID_SHIFT; + die_id = (aff3 & DIE_ID_MASK) >> DIE_ID_SHIFT; + if (die_id == TOTEM_B_ID) + die_id = 0; + else + die_id = 1; + + if (aff3 == vm_aff3s[0]) { + phy_aff2 = g_die_pg[socket_id * die_num + die_id][aff2]; + val |= 1ULL << (phy_aff2 + DVMBM_DIE1_CLUSTER_SHIFT); + } else { + phy_aff2 = g_die_pg[socket_id * die_num + die_id][aff2]; + val |= 1ULL << (phy_aff2 + DVMBM_DIE2_CLUSTER_SHIFT); + } + } + +out_update: + kvm->arch.tlbi_dvmbm = val; +} + +static u64 convert_aff3_to_die_hip12(u64 aff3) +{ + /* + * On HIP12, we use 4 bits to represent a die in SYS_LSUDVMBM_EL2. + * + * die1: socket ID (bits[60:59]) + die ID (bits[58:57]) + * die2: socket ID (bits[56:55]) + die ID (bits[54:53]) + * + * We therefore need to properly encode Aff3 into it. + */ + return FIELD_GET(MPIDR_AFF3_SOCKET_ID_MASK, aff3) << 2 | + FIELD_GET(MPIDR_AFF3_DIE_ID_MASK, aff3); +} + +static void kvm_update_vm_lsudvmbm_hip12(struct kvm *kvm) +{ + u64 mpidr, aff3, aff2; + u64 vm_aff3s[DVMBM_MAX_DIES_HIP12]; + u64 val; + int cpu, nr_dies; + u64 die1, die2; + + nr_dies = kvm_dvmbm_get_dies_info(kvm, vm_aff3s, DVMBM_MAX_DIES_HIP12); + if (nr_dies > 2) { + val = DVMBM_RANGE_ALL_DIES << DVMBM_RANGE_SHIFT; + goto out_update; + } + + if (nr_dies == 1) { + die1 = convert_aff3_to_die_hip12(vm_aff3s[0]); + val = DVMBM_RANGE_ONE_DIE << DVMBM_RANGE_SHIFT | + die1 << DVMBM_DIE1_SHIFT_HIP12; + + /* fulfill bits [11:6] */ + for_each_cpu(cpu, kvm->arch.sched_cpus) { + mpidr = cpu_logical_map(cpu); + aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + + val |= 1ULL << (aff2 + DVMBM_DIE1_CLUSTER_SHIFT_HIP12); + } + + goto out_update; + } + + /* nr_dies == 2 */ + die1 = convert_aff3_to_die_hip12(vm_aff3s[0]); + die2 = convert_aff3_to_die_hip12(vm_aff3s[1]); + val = DVMBM_RANGE_TWO_DIES << DVMBM_RANGE_SHIFT | + DVMBM_GRAN_CLUSTER << DVMBM_GRAN_SHIFT | + die1 << DVMBM_DIE1_SHIFT_HIP12 | + die2 << DVMBM_DIE2_SHIFT_HIP12; + + /* and fulfill bits [11:0] */ + for_each_cpu(cpu, kvm->arch.sched_cpus) { + mpidr = cpu_logical_map(cpu); + aff3 = MPIDR_AFFINITY_LEVEL(mpidr, 3); + aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + + if (aff3 == vm_aff3s[0]) + val |= 1ULL << (aff2 + DVMBM_DIE1_CLUSTER_SHIFT_HIP12); + else + val |= 1ULL << (aff2 + DVMBM_DIE2_CLUSTER_SHIFT_HIP12); + } + +out_update: + kvm->arch.tlbi_dvmbm = val; +} + +void kvm_tlbi_dvmbm_vcpu_load(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tmp; + cpumask_t mask; + unsigned long i; + + /* Don't bother on old hardware */ + if (!kvm_dvmbm_support) + return; + + cpumask_copy(vcpu->arch.sched_cpus, current->cpus_ptr); + + if (likely(cpumask_equal(vcpu->arch.sched_cpus, + vcpu->arch.pre_sched_cpus))) { + kvm_write_lsudvmbm(kvm); + return; + } + + /* Re-calculate sched_cpus for this VM */ + spin_lock(&kvm->arch.sched_lock); + + cpumask_clear(&mask); + kvm_for_each_vcpu(i, tmp, kvm) { + /* + * We may get the stale sched_cpus if another thread + * is concurrently changing its affinity. It'll + * eventually go through vcpu_load() and we rely on + * the last sched_lock holder to make things correct. + */ + cpumask_or(&mask, &mask, tmp->arch.sched_cpus); + } + + if (cpumask_equal(kvm->arch.sched_cpus, &mask)) + goto out_unlock; + + cpumask_copy(kvm->arch.sched_cpus, &mask); + + kvm_flush_remote_tlbs(kvm); + + /* + * Re-calculate LSUDVMBM_EL2 for this VM and kick all vcpus + * out to reload the LSUDVMBM configuration. + */ + if (cpu_type == HI_IP12) + kvm_update_vm_lsudvmbm_hip12(kvm); + else + kvm_update_vm_lsudvmbm(kvm); + kvm_make_all_cpus_request(kvm, KVM_REQ_RELOAD_TLBI_DVMBM); + +out_unlock: + __kvm_write_lsudvmbm(kvm); + spin_unlock(&kvm->arch.sched_lock); +} + +void kvm_tlbi_dvmbm_vcpu_put(struct kvm_vcpu *vcpu) +{ + if (!kvm_dvmbm_support) + return; + + cpumask_copy(vcpu->arch.pre_sched_cpus, vcpu->arch.sched_cpus); +} + +void kvm_get_pg_cfg(void) +{ + void __iomem *mn_base; + u32 i, j; + u32 pg_cfgs[MAX_PG_CFG_SOCKETS * MAX_DIES_PER_SOCKET]; + u64 mn_phy_base; + u32 val; + + if (cpu_type == HI_IP12) + return; + + socket_num = kvm_get_socket_num(); + die_num = kvm_get_die_num(); + + for (i = 0; i < socket_num; i++) { + for (j = 0; j < die_num; j++) { + + /* + * totem B means the first CPU DIE within a SOCKET, + * totem A means the second one. + */ + mn_phy_base = (j == 0) ? TB_MN_BASE : TA_MN_BASE; + mn_phy_base += CHIP_ADDR_OFFSET(i); + mn_phy_base += MN_ECO0_OFFSET; + + mn_base = ioremap(mn_phy_base, 4); + if (!mn_base) { + kvm_info("MN base addr ioremap failed\n"); + return; + } + val = readl_relaxed(mn_base); + pg_cfgs[j + i * die_num] = val & 0xff; + kvm_get_die_pg(pg_cfgs[j + i * die_num], i, j); + iounmap(mn_base); + } + } +} + +int kvm_sched_affinity_vm_init(struct kvm *kvm) +{ + if (!kvm_dvmbm_support) + return 0; + + spin_lock_init(&kvm->arch.sched_lock); + if (!zalloc_cpumask_var(&kvm->arch.sched_cpus, GFP_ATOMIC)) + return -ENOMEM; + + return 0; +} + +void kvm_sched_affinity_vm_destroy(struct kvm *kvm) +{ + if (!kvm_dvmbm_support) + return; + + free_cpumask_var(kvm->arch.sched_cpus); +} + +void kvm_hisi_reload_lsudvmbm(struct kvm *kvm) +{ + if (WARN_ON_ONCE(!kvm_dvmbm_support)) + return; + + preempt_disable(); + kvm_write_lsudvmbm(kvm); + preempt_enable(); +} diff --git a/arch/arm64/kvm/hisilicon/hisi_virt.h b/arch/arm64/kvm/hisilicon/hisi_virt.h new file mode 100644 index 0000000000000000000000000000000000000000..c57ca65970de3e5543cdb19e4852777a44999490 --- /dev/null +++ b/arch/arm64/kvm/hisilicon/hisi_virt.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright(c) 2022 Huawei Technologies Co., Ltd + */ + +#ifndef __HISI_VIRT_H__ +#define __HISI_VIRT_H__ + +#ifdef CONFIG_KVM_HISI_VIRT +enum hisi_cpu_type { + HI_1612, + HI_1616, + HI_1620, + HI_IP09, + HI_IP10, + HI_IP10C, + HI_IP12, + UNKNOWN_HI_TYPE +}; + +/* HIP10 */ +#define AIDR_EL1_DVMBM_MASK GENMASK_ULL(13, 12) +#define SYS_LSUDVM_CTRL_EL2 sys_reg(3, 4, 15, 7, 4) +#define LSUDVM_CTLR_EL2_MASK BIT_ULL(0) + +#define MAX_CLUSTERS_PER_DIE 8 +#define TB_MN_BASE 0x00C6067f0000 +#define TA_MN_BASE 0x0046067F0000 +#define CHIP_ADDR_OFFSET(_chip) (((((_chip) >> 3) & 0x1) * 0x80000000000) + \ + ((((_chip) >> 2) & 0x1) * (0x100000000000)) + \ + (((_chip) & 0x3) * 0x200000000000)) +#define MAX_PG_CFG_SOCKETS 4 +#define MAX_DIES_PER_SOCKET 2 +#define MN_ECO0_OFFSET 0xc00 +#define SOCKET_ID_MASK 0xf8 +#define SOCKET_ID_SHIFT 3 +#define DIE_ID_MASK 0x7 +#define DIE_ID_SHIFT 0 +#define TOTEM_B_ID 3 + +/* + * MPIDR_EL1 layout on HIP10/HIP10C + * + * Aff3[7:3] - socket ID [0-15] + * Aff3[2:0] - die ID [1,3] + * Aff2 - cluster ID [0-9] + * Aff1 - core ID [0-3] + * Aff0 - thread ID [0,1] + */ + +#define SYS_LSUDVMBM_EL2 sys_reg(3, 4, 15, 7, 5) +#define DVMBM_RANGE_SHIFT 62 +#define DVMBM_RANGE_ONE_DIE 0ULL +#define DVMBM_RANGE_TWO_DIES 1ULL +#define DVMBM_RANGE_ALL_DIES 3ULL + +#define DVMBM_GRAN_SHIFT 61 +#define DVMBM_GRAN_CLUSTER 0ULL +#define DVMBM_GRAN_DIE 1ULL + +#define DVMBM_DIE1_SHIFT 53 +#define DVMBM_DIE2_SHIFT 45 +#define DVMBM_DIE1_CLUSTER_SHIFT 22 +#define DVMBM_DIE2_CLUSTER_SHIFT 0 + +#define DVMBM_MAX_DIES 32 + +/* + * MPIDR_EL1 layout on HIP12 + * + * Aff3[4:3] - socket ID [0-3] + * Aff3[2:0] - die ID [0,1] + * Aff2[2:0] - cluster ID [0-5] + * Aff1[3:0] - core ID [0-15] + * Aff0[0] - thread ID [0,1] + * + * On HIP12, cpu die is named as vdie. Actually, + * vdie is equivalent to cpu die. Here use die + * to describe vdie. + */ + +#define MPIDR_AFF3_SOCKET_ID_MASK GENMASK(4, 3) +#define MPIDR_AFF3_DIE_ID_MASK GENMASK(2, 0) +#define DVMBM_DIE1_SHIFT_HIP12 57 +#define DVMBM_DIE2_SHIFT_HIP12 53 +#define DVMBM_DIE1_CLUSTER_SHIFT_HIP12 6 +#define DVMBM_DIE2_CLUSTER_SHIFT_HIP12 0 +#define DVMBM_MAX_DIES_HIP12 8 + +void probe_hisi_cpu_type(void); +bool hisi_ncsnp_supported(void); +bool hisi_dvmbm_supported(void); +void kvm_get_pg_cfg(void); + +int kvm_sched_affinity_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_sched_affinity_vcpu_destroy(struct kvm_vcpu *vcpu); +int kvm_sched_affinity_vm_init(struct kvm *kvm); +void kvm_sched_affinity_vm_destroy(struct kvm *kvm); +void kvm_tlbi_dvmbm_vcpu_load(struct kvm_vcpu *vcpu); +void kvm_tlbi_dvmbm_vcpu_put(struct kvm_vcpu *vcpu); +void kvm_hisi_reload_lsudvmbm(struct kvm *kvm); +#else +static inline void probe_hisi_cpu_type(void) {} +static inline bool hisi_ncsnp_supported(void) +{ + return false; +} +static inline bool hisi_dvmbm_supported(void) +{ + return false; +} +static inline void kvm_get_pg_cfg(void) {} + +static inline int kvm_sched_affinity_vcpu_init(struct kvm_vcpu *vcpu) +{ + return 0; +} +static inline void kvm_sched_affinity_vcpu_destroy(struct kvm_vcpu *vcpu) {} +static inline int kvm_sched_affinity_vm_init(struct kvm *kvm) +{ + return 0; +} +static inline void kvm_sched_affinity_vm_destroy(struct kvm *kvm) {} +static inline void kvm_tlbi_dvmbm_vcpu_load(struct kvm_vcpu *vcpu) {} +static inline void kvm_tlbi_dvmbm_vcpu_put(struct kvm_vcpu *vcpu) {} +static inline void kvm_hisi_reload_lsudvmbm(struct kvm *kvm) {} +#endif /* CONFIG_KVM_HISI_VIRT */ + +#endif /* __HISI_VIRT_H__ */ diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c624e6dd54df2ad4a2b4b8f645a6f80bbe752868..0ebe8ccae5a8c46cc2aee5c339ce9186410691b4 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1361,7 +1361,7 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) .arg = pgt, }; - if (stage2_has_fwb(pgt)) + if (kvm_ncsnp_support || stage2_has_fwb(pgt)) return 0; return kvm_pgtable_walk(pgt, addr, size, &walker);