From 4b380022f7e2593ac1915102b68af3a66e4ac9bd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 5 Jan 2022 04:35:12 -0800 Subject: [PATCH 01/37] x86/fpu: Extend fpu_xstate_prctl() with guest permissions mainline inclusion from mainline-v5.17-rc1 commit 980fe2fddcff21937c93532b4597c8ea450346c1 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 980fe2fddcff x86/fpu: Extend fpu_xstate_prctl() with guest permissions. -------------------------------- KVM requires a clear separation of host user space and guest permissions for dynamic XSTATE components. Add a guest permissions member to struct fpu and a separate set of prctl() arguments: ARCH_GET_XCOMP_GUEST_PERM and ARCH_REQ_XCOMP_GUEST_PERM. The semantics are equivalent to the host user space permission control except for the following constraints: 1) Permissions have to be requested before the first vCPU is created 2) Permissions are frozen when the first vCPU is created to ensure consistency. Any attempt to expand permissions via the prctl() after that point is rejected. Signed-off-by: Thomas Gleixner Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-2-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 2 ++ arch/x86/include/asm/fpu/types.h | 9 ++++++ arch/x86/include/uapi/asm/prctl.h | 26 ++++++++------- arch/x86/kernel/fpu/core.c | 3 ++ arch/x86/kernel/fpu/xstate.c | 53 +++++++++++++++++++++++-------- arch/x86/kernel/fpu/xstate.h | 13 ++++++-- arch/x86/kernel/process.c | 2 ++ 7 files changed, 80 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index d6e8933faafe..b4c99b229891 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -113,6 +113,8 @@ static inline void fpstate_free(struct fpu *fpu) { } /* fpstate-related functions which are exported to KVM */ extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature); +extern inline u64 xstate_get_guest_group_perm(void); + /* KVM specific functions */ extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu); extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 408a6dbe1d61..c03f6777c580 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -389,6 +389,8 @@ struct fpstate { /* @regs is dynamically sized! Don't add anything after @regs! */ } __aligned(64); +#define FPU_GUEST_PERM_LOCKED BIT_ULL(63) + struct fpu_state_perm { /* * @__state_perm: @@ -478,6 +480,13 @@ struct fpu { */ KABI_EXTEND(struct fpu_state_perm perm) + /* + * @guest_perm: + * + * Permission related information for guest pseudo FPUs + */ + struct fpu_state_perm guest_perm; + /* * @__fpstate: * diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 754a07856817..500b96e71f18 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -2,20 +2,22 @@ #ifndef _ASM_X86_PRCTL_H #define _ASM_X86_PRCTL_H -#define ARCH_SET_GS 0x1001 -#define ARCH_SET_FS 0x1002 -#define ARCH_GET_FS 0x1003 -#define ARCH_GET_GS 0x1004 +#define ARCH_SET_GS 0x1001 +#define ARCH_SET_FS 0x1002 +#define ARCH_GET_FS 0x1003 +#define ARCH_GET_GS 0x1004 -#define ARCH_GET_CPUID 0x1011 -#define ARCH_SET_CPUID 0x1012 +#define ARCH_GET_CPUID 0x1011 +#define ARCH_SET_CPUID 0x1012 -#define ARCH_GET_XCOMP_SUPP 0x1021 -#define ARCH_GET_XCOMP_PERM 0x1022 -#define ARCH_REQ_XCOMP_PERM 0x1023 +#define ARCH_GET_XCOMP_SUPP 0x1021 +#define ARCH_GET_XCOMP_PERM 0x1022 +#define ARCH_REQ_XCOMP_PERM 0x1023 +#define ARCH_GET_XCOMP_GUEST_PERM 0x1024 +#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 -#define ARCH_MAP_VDSO_X32 0x2001 -#define ARCH_MAP_VDSO_32 0x2002 -#define ARCH_MAP_VDSO_64 0x2003 +#define ARCH_MAP_VDSO_X32 0x2001 +#define ARCH_MAP_VDSO_32 0x2002 +#define ARCH_MAP_VDSO_64 0x2003 #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1db281e8ef21..aba24b52e050 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -434,6 +434,8 @@ void fpstate_reset(struct fpu *fpu) fpu->perm.__state_perm = fpu_kernel_cfg.default_features; fpu->perm.__state_size = fpu_kernel_cfg.default_size; fpu->perm.__user_state_size = fpu_user_cfg.default_size; + /* Same defaults for guests */ + fpu->guest_perm = fpu->perm; } static inline void fpu_inherit_perms(struct fpu *dst_fpu) @@ -444,6 +446,7 @@ static inline void fpu_inherit_perms(struct fpu *dst_fpu) spin_lock_irq(¤t->sighand->siglock); /* Fork also inherits the permissions of the parent */ dst_fpu->perm = src_fpu->perm; + dst_fpu->guest_perm = src_fpu->guest_perm; spin_unlock_irq(¤t->sighand->siglock); } } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index b66bb37f2bcf..70290089c506 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1595,7 +1595,7 @@ static int validate_sigaltstack(unsigned int usize) return 0; } -static int __xstate_request_perm(u64 permitted, u64 requested) +static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) { /* * This deliberately does not exclude !XSAVES as we still might @@ -1605,9 +1605,10 @@ static int __xstate_request_perm(u64 permitted, u64 requested) */ bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); struct fpu *fpu = ¤t->group_leader->thread.fpu; + struct fpu_state_perm *perm; unsigned int ksize, usize; u64 mask; - int ret; + int ret = 0; /* Check whether fully enabled */ if ((permitted & requested) == requested) @@ -1621,15 +1622,18 @@ static int __xstate_request_perm(u64 permitted, u64 requested) mask &= XFEATURE_MASK_USER_SUPPORTED; usize = xstate_calculate_size(mask, false); - ret = validate_sigaltstack(usize); - if (ret) - return ret; + if (!guest) { + ret = validate_sigaltstack(usize); + if (ret) + return ret; + } + perm = guest ? &fpu->guest_perm : &fpu->perm; /* Pairs with the READ_ONCE() in xstate_get_group_perm() */ - WRITE_ONCE(fpu->perm.__state_perm, mask); + WRITE_ONCE(perm->__state_perm, mask); /* Protected by sighand lock */ - fpu->perm.__state_size = ksize; - fpu->perm.__user_state_size = usize; + perm->__state_size = ksize; + perm->__user_state_size = usize; return ret; } @@ -1640,7 +1644,7 @@ static const u64 xstate_prctl_req[XFEATURE_MAX] = { [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA, }; -static int xstate_request_perm(unsigned long idx) +static int xstate_request_perm(unsigned long idx, bool guest) { u64 permitted, requested; int ret; @@ -1661,14 +1665,19 @@ static int xstate_request_perm(unsigned long idx) return -EOPNOTSUPP; /* Lockless quick check */ - permitted = xstate_get_host_group_perm(); + permitted = xstate_get_group_perm(guest); if ((permitted & requested) == requested) return 0; /* Protect against concurrent modifications */ spin_lock_irq(¤t->sighand->siglock); - permitted = xstate_get_host_group_perm(); - ret = __xstate_request_perm(permitted, requested); + permitted = xstate_get_group_perm(guest); + + /* First vCPU allocation locks the permissions. */ + if (guest && (permitted & FPU_GUEST_PERM_LOCKED)) + ret = -EBUSY; + else + ret = __xstate_request_perm(permitted, requested, guest); spin_unlock_irq(¤t->sighand->siglock); return ret; } @@ -1713,12 +1722,18 @@ int xfd_enable_feature(u64 xfd_err) return 0; } #else /* CONFIG_X86_64 */ -static inline int xstate_request_perm(unsigned long idx) +static inline int xstate_request_perm(unsigned long idx, bool guest) { return -EPERM; } #endif /* !CONFIG_X86_64 */ +inline u64 xstate_get_guest_group_perm(void) +{ + return xstate_get_group_perm(true); +} +EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm); + /** * fpu_xstate_prctl - xstate permission operations * @tsk: Redundant pointer to current @@ -1742,6 +1757,7 @@ long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2) u64 __user *uptr = (u64 __user *)arg2; u64 permitted, supported; unsigned long idx = arg2; + bool guest = false; if (tsk != current) return -EPERM; @@ -1760,11 +1776,20 @@ long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2) permitted &= XFEATURE_MASK_USER_SUPPORTED; return put_user(permitted, uptr); + case ARCH_GET_XCOMP_GUEST_PERM: + permitted = xstate_get_guest_group_perm(); + permitted &= XFEATURE_MASK_USER_SUPPORTED; + return put_user(permitted, uptr); + + case ARCH_REQ_XCOMP_GUEST_PERM: + guest = true; + fallthrough; + case ARCH_REQ_XCOMP_PERM: if (!IS_ENABLED(CONFIG_X86_64)) return -EOPNOTSUPP; - return xstate_request_perm(idx); + return xstate_request_perm(idx, guest); default: return -EINVAL; diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 842accdc6c34..09c960f83256 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -20,10 +20,19 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask) xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT; } -static inline u64 xstate_get_host_group_perm(void) +static inline u64 xstate_get_group_perm(bool guest) { + struct fpu *fpu = ¤t->group_leader->thread.fpu; + struct fpu_state_perm *perm; + /* Pairs with WRITE_ONCE() in xstate_request_perm() */ - return READ_ONCE(current->group_leader->thread.fpu.perm.__state_perm); + perm = guest ? &fpu->guest_perm : &fpu->perm; + return READ_ONCE(perm->__state_perm); +} + +static inline u64 xstate_get_host_group_perm(void) +{ + return xstate_get_group_perm(false); } enum xstate_copy_mode { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index bcfd333abf3d..386eb15a93f2 100755 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -1003,6 +1003,8 @@ long do_arch_prctl_common(struct task_struct *task, int option, case ARCH_GET_XCOMP_SUPP: case ARCH_GET_XCOMP_PERM: case ARCH_REQ_XCOMP_PERM: + case ARCH_GET_XCOMP_GUEST_PERM: + case ARCH_REQ_XCOMP_GUEST_PERM: return fpu_xstate_prctl(task, option, arg2); } -- Gitee From a7fdca8daa904f13e92d3ec8f30f8b067afdc271 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 5 Jan 2022 04:35:13 -0800 Subject: [PATCH 02/37] x86/fpu: Prepare guest FPU for dynamically enabled FPU features mainline inclusion from mainline-v5.17-rc1 commit 36487e6228c4cb04257c92266a04078a384bc4ec category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 36487e6228c4 x86/fpu: Prepare guest FPU for dynamically enabled FPU features. -------------------------------- To support dynamically enabled FPU features for guests prepare the guest pseudo FPU container to keep track of the currently enabled xfeatures and the guest permissions. Signed-off-by: Thomas Gleixner Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-3-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/types.h | 13 +++++++++++++ arch/x86/kernel/fpu/core.c | 26 +++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index c03f6777c580..06019b78148a 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -507,6 +507,19 @@ struct fpu { * Guest pseudo FPU container */ struct fpu_guest { + /* + * @xfeatures: xfeature bitmap of features which are + * currently enabled for the guest vCPU. + */ + u64 xfeatures; + + /* + * @perm: xfeature bitmap of features which are + * permitted to be enabled for the guest + * vCPU. + */ + u64 perm; + /* * @fpstate: Pointer to the allocated guest fpstate */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index aba24b52e050..8c654fab6e23 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -185,6 +185,26 @@ void fpu_reset_from_exception_fixup(void) #if IS_ENABLED(CONFIG_KVM) static void __fpstate_reset(struct fpstate *fpstate); +static void fpu_init_guest_permissions(struct fpu_guest *gfpu) +{ + struct fpu_state_perm *fpuperm; + u64 perm; + + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + spin_lock_irq(¤t->sighand->siglock); + fpuperm = ¤t->group_leader->thread.fpu.guest_perm; + perm = fpuperm->__state_perm; + + /* First fpstate allocation locks down permissions. */ + WRITE_ONCE(fpuperm->__state_perm, perm | FPU_GUEST_PERM_LOCKED); + + spin_unlock_irq(¤t->sighand->siglock); + + gfpu->perm = perm & ~FPU_GUEST_PERM_LOCKED; +} + bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) { struct fpstate *fpstate; @@ -200,7 +220,11 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) fpstate->is_valloc = true; fpstate->is_guest = true; - gfpu->fpstate = fpstate; + gfpu->fpstate = fpstate; + gfpu->xfeatures = fpu_user_cfg.default_features; + gfpu->perm = fpu_user_cfg.default_features; + fpu_init_guest_permissions(gfpu); + return true; } EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate); -- Gitee From 52cb316780b4d4f09af285d0e4ba1cd208dcbacf Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Wed, 5 Jan 2022 04:35:14 -0800 Subject: [PATCH 03/37] kvm: x86: Fix xstate_required_size() to follow XSTATE alignment rule mainline inclusion from mainline-v5.17-rc1 commit cc04b6a21d431359eceeec0d812b492088b04af5 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit cc04b6a21d43 kvm: x86: Fix xstate_required_size() to follow XSTATE alignment rule. -------------------------------- CPUID.0xD.1.EBX enumerates the size of the XSAVE area (in compacted format) required by XSAVES. If CPUID.0xD.i.ECX[1] is set for a state component (i), this state component should be located on the next 64-bytes boundary following the preceding state component in the compacted layout. Fix xstate_required_size() to follow the alignment rule. AMX is the first state component with 64-bytes alignment to catch this bug. Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-4-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/kvm/cpuid.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 4638672db180..7505daaa83bf 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -42,7 +42,11 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted) if (xstate_bv & 0x1) { u32 eax, ebx, ecx, edx, offset; cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx); - offset = compacted ? ret : ebx; + /* ECX[1]: 64B alignment in compacted form */ + if (compacted) + offset = (ecx & 0x2) ? ALIGN(ret, 64) : ret; + else + offset = ebx; ret = max(ret, offset + eax); } -- Gitee From 2556e7fc5e8bc1240f8ef697b04d3f4842b4e4db Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Wed, 5 Jan 2022 04:35:15 -0800 Subject: [PATCH 04/37] kvm: x86: Exclude unpermitted xfeatures at KVM_GET_SUPPORTED_CPUID mainline inclusion from mainline-v5.17-rc1 commit 445ecdf79be0c71ca248f7611aeefceaea3ec59f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 445ecdf79be0 kvm: x86: Exclude unpermitted xfeatures at KVM_GET_SUPPORTED_CPUID. -------------------------------- KVM_GET_SUPPORTED_CPUID should not include any dynamic xstates in CPUID[0xD] if they have not been requested with prctl. Otherwise a process which directly passes KVM_GET_SUPPORTED_CPUID to KVM_SET_CPUID2 would now fail even if it doesn't intend to use a dynamically enabled feature. Userspace must know that prctl is required and allocate >4K xstate buffer before setting any dynamic bit. Suggested-by: Paolo Bonzini Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-5-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- Documentation/virt/kvm/api.rst | 4 ++++ arch/x86/kvm/cpuid.c | 9 ++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index a1575e9532ec..dc2da951208a 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1632,6 +1632,10 @@ userspace capabilities, and with user requirements (for example, the user may wish to constrain cpuid to emulate older hardware, or for feature consistency across a cluster). +Dynamically-enabled feature bits need to be requested with +``arch_prctl()`` before calling this ioctl. Feature bits that have not +been requested are excluded from the result. + Note that certain capabilities, such as KVM_CAP_X86_DISABLE_EXITS, may expose cpuid features (e.g. MONITOR) which are not supported by kvm in its default configuration. If userspace enables such capabilities, it diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7505daaa83bf..2ccfaee42fc2 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -765,11 +765,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) goto out; } break; - case 0xd: - entry->eax &= supported_xcr0; + case 0xd: { + u64 guest_perm = xstate_get_guest_group_perm(); + + entry->eax &= supported_xcr0 & guest_perm; entry->ebx = xstate_required_size(supported_xcr0, false); entry->ecx = entry->ebx; - entry->edx &= supported_xcr0 >> 32; + entry->edx &= (supported_xcr0 & guest_perm) >> 32; if (!supported_xcr0) break; @@ -816,6 +818,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->edx = 0; } break; + } case 0x12: /* Intel SGX */ if (!kvm_cpu_cap_has(X86_FEATURE_SGX)) { -- Gitee From 35a446b1d50d1c89dde5f8261c5f534c6dd2296b Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Wed, 5 Jan 2022 04:35:16 -0800 Subject: [PATCH 05/37] x86/fpu: Make XFD initialization in __fpstate_reset() a function argument mainline inclusion from mainline-v5.17-rc1 commit b0237dad2d7f8820b5b415291431d8259e787470 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit b0237dad2d7f x86/fpu: Make XFD initialization in __fpstate_reset() a function argument. -------------------------------- vCPU threads are different from native tasks regarding to the initial XFD value. While all native tasks follow a fixed value (init_fpstate::xfd) established by the FPU core at boot, vCPU threads need to obey the reset value (i.e. ZERO) defined by the specification, to meet the expectation of the guest. Let the caller supply an argument and adjust the host and guest related invocations accordingly. Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Signed-off-by: Thomas Gleixner Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-6-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 8c654fab6e23..b9a53a8dc847 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -183,7 +183,7 @@ void fpu_reset_from_exception_fixup(void) } #if IS_ENABLED(CONFIG_KVM) -static void __fpstate_reset(struct fpstate *fpstate); +static void __fpstate_reset(struct fpstate *fpstate, u64 xfd); static void fpu_init_guest_permissions(struct fpu_guest *gfpu) { @@ -215,7 +215,8 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) if (!fpstate) return false; - __fpstate_reset(fpstate); + /* Leave xfd to 0 (the reset value defined by spec) */ + __fpstate_reset(fpstate, 0); fpstate_init_user(fpstate); fpstate->is_valloc = true; fpstate->is_guest = true; @@ -438,21 +439,21 @@ void fpstate_init_user(struct fpstate *fpstate) fpstate_init_fstate(fpstate); } -static void __fpstate_reset(struct fpstate *fpstate) +static void __fpstate_reset(struct fpstate *fpstate, u64 xfd) { /* Initialize sizes and feature masks */ fpstate->size = fpu_kernel_cfg.default_size; fpstate->user_size = fpu_user_cfg.default_size; fpstate->xfeatures = fpu_kernel_cfg.default_features; fpstate->user_xfeatures = fpu_user_cfg.default_features; - fpstate->xfd = init_fpstate.xfd; + fpstate->xfd = xfd; } void fpstate_reset(struct fpu *fpu) { /* Set the fpstate pointer to the default fpstate */ fpu->fpstate = &fpu->__fpstate; - __fpstate_reset(fpu->fpstate); + __fpstate_reset(fpu->fpstate, init_fpstate.xfd); /* Initialize the permission related info in fpu */ fpu->perm.__state_perm = fpu_kernel_cfg.default_features; -- Gitee From aa95677d1285ce13e595d5573e9efe7b92507f90 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 5 Jan 2022 04:35:17 -0800 Subject: [PATCH 06/37] x86/fpu: Add guest support to xfd_enable_feature() mainline inclusion from mainline-v5.17-rc1 commit c270ce393dfd700e7510a4579568deeefba954fd category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit c270ce393dfd x86/fpu: Add guest support to xfd_enable_feature(). -------------------------------- Guest support for dynamically enabled FPU features requires a few modifications to the enablement function which is currently invoked from the #NM handler: 1) Use guest permissions and sizes for the update 2) Update fpu_guest state accordingly 3) Take into account that the enabling can be triggered either from a running guest via XSETBV and MSR_IA32_XFD write emulation or from a guest restore. In the latter case the guests fpstate is not the current tasks active fpstate. Split the function and implement the guest mechanics throughout the callchain. Signed-off-by: Thomas Gleixner Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-7-yang.zhong@intel.com> [Add 32-bit stub for __xfd_enable_feature. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/kernel/fpu/xstate.c | 93 +++++++++++++++++++++--------------- arch/x86/kernel/fpu/xstate.h | 6 +++ 2 files changed, 60 insertions(+), 39 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 70290089c506..9883ede45dc9 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1499,29 +1499,6 @@ void fpstate_free(struct fpu *fpu) vfree(fpu->fpstate); } -/** - * fpu_install_fpstate - Update the active fpstate in the FPU - * - * @fpu: A struct fpu * pointer - * @newfps: A struct fpstate * pointer - * - * Returns: A null pointer if the last active fpstate is the embedded - * one or the new fpstate is already installed; - * otherwise, a pointer to the old fpstate which has to - * be freed by the caller. - */ -static struct fpstate *fpu_install_fpstate(struct fpu *fpu, - struct fpstate *newfps) -{ - struct fpstate *oldfps = fpu->fpstate; - - if (fpu->fpstate == newfps) - return NULL; - - fpu->fpstate = newfps; - return oldfps != &fpu->__fpstate ? oldfps : NULL; -} - /** * fpstate_realloc - Reallocate struct fpstate for the requested new features * @@ -1529,6 +1506,7 @@ static struct fpstate *fpu_install_fpstate(struct fpu *fpu, * of that task * @ksize: The required size for the kernel buffer * @usize: The required size for user space buffers + * @guest_fpu: Pointer to a guest FPU container. NULL for host allocations * * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer * terminates quickly, vfree()-induced IPIs may be a concern, but tasks @@ -1537,13 +1515,13 @@ static struct fpstate *fpu_install_fpstate(struct fpu *fpu, * Returns: 0 on success, -ENOMEM on allocation error. */ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, - unsigned int usize) + unsigned int usize, struct fpu_guest *guest_fpu) { struct fpu *fpu = ¤t->thread.fpu; struct fpstate *curfps, *newfps = NULL; unsigned int fpsize; + bool in_use; - curfps = fpu->fpstate; fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64); newfps = vzalloc(fpsize); @@ -1553,28 +1531,55 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, newfps->user_size = usize; newfps->is_valloc = true; + /* + * When a guest FPU is supplied, use @guest_fpu->fpstate + * as reference independent whether it is in use or not. + */ + curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate; + + /* Determine whether @curfps is the active fpstate */ + in_use = fpu->fpstate == curfps; + + if (guest_fpu) { + newfps->is_guest = true; + newfps->is_confidential = curfps->is_confidential; + newfps->in_use = curfps->in_use; + guest_fpu->xfeatures |= xfeatures; + } + fpregs_lock(); /* - * Ensure that the current state is in the registers before - * swapping fpstate as that might invalidate it due to layout - * changes. + * If @curfps is in use, ensure that the current state is in the + * registers before swapping fpstate as that might invalidate it + * due to layout changes. */ - if (test_thread_flag(TIF_NEED_FPU_LOAD)) + if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD)) fpregs_restore_userregs(); newfps->xfeatures = curfps->xfeatures | xfeatures; newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; newfps->xfd = curfps->xfd & ~xfeatures; - curfps = fpu_install_fpstate(fpu, newfps); - /* Do the final updates within the locked region */ xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures); - xfd_update_state(newfps); + if (guest_fpu) { + guest_fpu->fpstate = newfps; + /* If curfps is active, update the FPU fpstate pointer */ + if (in_use) + fpu->fpstate = newfps; + } else { + fpu->fpstate = newfps; + } + + if (in_use) + xfd_update_state(fpu->fpstate); fpregs_unlock(); - vfree(curfps); + /* Only free valloc'ed state */ + if (curfps && curfps->is_valloc) + vfree(curfps); + return 0; } @@ -1682,14 +1687,16 @@ static int xstate_request_perm(unsigned long idx, bool guest) return ret; } -int xfd_enable_feature(u64 xfd_err) +int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu) { u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC; + struct fpu_state_perm *perm; unsigned int ksize, usize; struct fpu *fpu; if (!xfd_event) { - pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err); + if (!guest_fpu) + pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err); return 0; } @@ -1697,14 +1704,16 @@ int xfd_enable_feature(u64 xfd_err) spin_lock_irq(¤t->sighand->siglock); /* If not permitted let it die */ - if ((xstate_get_host_group_perm() & xfd_event) != xfd_event) { + if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) { spin_unlock_irq(¤t->sighand->siglock); return -EPERM; } fpu = ¤t->group_leader->thread.fpu; - ksize = fpu->perm.__state_size; - usize = fpu->perm.__user_state_size; + perm = guest_fpu ? &fpu->guest_perm : &fpu->perm; + ksize = perm->__state_size; + usize = perm->__user_state_size; + /* * The feature is permitted. State size is sufficient. Dropping * the lock is safe here even if more features are added from @@ -1717,10 +1726,16 @@ int xfd_enable_feature(u64 xfd_err) * Try to allocate a new fpstate. If that fails there is no way * out. */ - if (fpstate_realloc(xfd_event, ksize, usize)) + if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu)) return -EFAULT; return 0; } + +int xfd_enable_feature(u64 xfd_err) +{ + return __xfd_enable_feature(xfd_err, NULL); +} + #else /* CONFIG_X86_64 */ static inline int xstate_request_perm(unsigned long idx, bool guest) { diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 09c960f83256..8707ba0f1a8b 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -162,8 +162,14 @@ static inline void xfd_update_state(struct fpstate *fpstate) } } } + +extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu); #else static inline void xfd_update_state(struct fpstate *fpstate) { } + +static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) { + return -EPERM; +} #endif /* -- Gitee From 172dc435581242edcd3e30d4e150fa8aa444e609 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 5 Jan 2022 04:35:18 -0800 Subject: [PATCH 07/37] x86/fpu: Provide fpu_enable_guest_xfd_features() for KVM mainline inclusion from mainline-v5.17-rc1 commit 0781d60f658e25fbad3b6e4261f54eb1cd3dc302 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 0781d60f658e x86/fpu: Provide fpu_enable_guest_xfd_features() for KVM. -------------------------------- Provide a wrapper for expanding the guest fpstate buffer according to requested xfeatures. KVM wants to call this wrapper to manage any dynamic xstate used by the guest. Suggested-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Kevin Tian Signed-off-by: Yang Zhong Reviewed-by: Paolo Bonzini Message-Id: <20220105123532.12586-8-yang.zhong@intel.com> [Remove unnecessary 32-bit check. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 1 + arch/x86/kernel/fpu/core.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index b4c99b229891..60c40466ee22 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -119,6 +119,7 @@ extern inline u64 xstate_get_guest_group_perm(void); extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu); extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest); +extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures); extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru); extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b9a53a8dc847..0f799e0c88c9 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -245,6 +245,28 @@ void fpu_free_guest_fpstate(struct fpu_guest *gfpu) } EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate); +/* + * fpu_enable_guest_xfd_features - Check xfeatures against guest perm and enable + * @guest_fpu: Pointer to the guest FPU container + * @xfeatures: Features requested by guest CPUID + * + * Enable all dynamic xfeatures according to guest perm and requested CPUID. + * + * Return: 0 on success, error code otherwise + */ +int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures) +{ + lockdep_assert_preemption_enabled(); + + /* Nothing to do if all requested features are already enabled. */ + xfeatures &= ~guest_fpu->xfeatures; + if (!xfeatures) + return 0; + + return __xfd_enable_feature(xfeatures, guest_fpu); +} +EXPORT_SYMBOL_GPL(fpu_enable_guest_xfd_features); + int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) { struct fpstate *guest_fps = guest_fpu->fpstate; -- Gitee From d63690fc43580eb87cf9cf5c82bc5ba960b56b38 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Wed, 5 Jan 2022 04:35:19 -0800 Subject: [PATCH 08/37] kvm: x86: Enable dynamic xfeatures at KVM_SET_CPUID2 mainline inclusion from mainline-v5.17-rc1 commit 5ab2f45bba4894a0db4af8567da3efd6228dd010 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 5ab2f45bba48 kvm: x86: Enable dynamic xfeatures at KVM_SET_CPUID2. -------------------------------- KVM can request fpstate expansion in two approaches: 1) When intercepting guest updates to XCR0 and XFD MSR; 2) Before vcpu runs (e.g. at KVM_SET_CPUID2); The first option doesn't waste memory for legacy guest if it doesn't support XFD. However doing so introduces more complexity and also imposes an order requirement in the restoring path, i.e. XCR0/XFD must be restored before XSTATE. Given that the agreement is to do the static approach. This is considered a better tradeoff though it does waste 8K memory for legacy guest if its CPUID includes dynamically-enabled xfeatures. Successful fpstate expansion requires userspace VMM to acquire guest xstate permissions before calling KVM_SET_CPUID2. Also take the chance to adjust the indent in kvm_set_cpuid(). Signed-off-by: Jing Liu Signed-off-by: Sean Christopherson Signed-off-by: Kevin Tian Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-9-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/kvm/cpuid.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2ccfaee42fc2..7113572f3e8c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -77,9 +77,12 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find( return NULL; } -static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent) +static int kvm_check_cpuid(struct kvm_vcpu *vcpu, + struct kvm_cpuid_entry2 *entries, + int nent) { struct kvm_cpuid_entry2 *best; + u64 xfeatures; /* * The existing code assumes virtual address is 48-bit or 57-bit in the @@ -93,7 +96,20 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent) return -EINVAL; } - return 0; + /* + * Exposing dynamic xfeatures to the guest requires additional + * enabling in the FPU, e.g. to expand the guest XSAVE state size. + */ + best = cpuid_entry2_find(entries, nent, 0xd, 0); + if (!best) + return 0; + + xfeatures = best->eax | ((u64)best->edx << 32); + xfeatures &= XFEATURE_MASK_USER_DYNAMIC; + if (!xfeatures) + return 0; + + return fpu_enable_guest_xfd_features(&vcpu->arch.guest_fpu, xfeatures); } void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) @@ -279,7 +295,7 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, e2[i].padding[2] = 0; } - r = kvm_check_cpuid(e2, cpuid->nent); + r = kvm_check_cpuid(vcpu, e2, cpuid->nent); if (r) { kvfree(e2); goto out_free_cpuid; @@ -315,7 +331,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, return PTR_ERR(e2); } - r = kvm_check_cpuid(e2, cpuid->nent); + r = kvm_check_cpuid(vcpu, e2, cpuid->nent); if (r) { kvfree(e2); return r; -- Gitee From 90ac802857dcdb3f05dafb6d5102a5adf73d4aad Mon Sep 17 00:00:00 2001 From: Kevin Tian Date: Wed, 5 Jan 2022 04:35:20 -0800 Subject: [PATCH 09/37] x86/fpu: Provide fpu_update_guest_xfd() for IA32_XFD emulation mainline inclusion from mainline-v5.17-rc1 commit 8eb9a48ac1e86a8a59f7123b529d6e498fb1f163 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 8eb9a48ac1e8 x86/fpu: Provide fpu_update_guest_xfd() for IA32_XFD emulation. -------------------------------- Guest XFD can be updated either in the emulation path or in the restore path. Provide a wrapper to update guest_fpu::fpstate::xfd. If the guest fpstate is currently in-use, also update the per-cpu xfd cache and the actual MSR. Signed-off-by: Kevin Tian Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-10-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 6 ++++++ arch/x86/kernel/fpu/core.c | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 60c40466ee22..f7c947b09acb 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -121,6 +121,12 @@ extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest); extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures); +#ifdef CONFIG_X86_64 +extern void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd); +#else +static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { } +#endif + extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru); extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 0f799e0c88c9..5dc3580dc20e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -267,6 +267,18 @@ int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures) } EXPORT_SYMBOL_GPL(fpu_enable_guest_xfd_features); +#ifdef CONFIG_X86_64 +void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) +{ + fpregs_lock(); + guest_fpu->fpstate->xfd = xfd; + if (guest_fpu->fpstate->in_use) + xfd_update_state(guest_fpu->fpstate); + fpregs_unlock(); +} +EXPORT_SYMBOL_GPL(fpu_update_guest_xfd); +#endif /* CONFIG_X86_64 */ + int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) { struct fpstate *guest_fps = guest_fpu->fpstate; -- Gitee From 5910de3245e502ec58bdd90b79f6d69c19cec196 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Wed, 5 Jan 2022 04:35:21 -0800 Subject: [PATCH 10/37] kvm: x86: Add emulation for IA32_XFD mainline inclusion from mainline-v5.17-rc1 commit 820a6ee944e74e57255ac2e90916ecdaade57b95 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 820a6ee944e7 kvm: x86: Add emulation for IA32_XFD. -------------------------------- Intel's eXtended Feature Disable (XFD) feature allows the software to dynamically adjust fpstate buffer size for XSAVE features which have large state. Because guest fpstate has been expanded for all possible dynamic xstates at KVM_SET_CPUID2, emulation of the IA32_XFD MSR is straightforward. For write just call fpu_update_guest_xfd() to update the guest fpu container once all the sanity checks are passed. For read simply return the cached value in the container. Signed-off-by: Jing Liu Signed-off-by: Zeng Guang Signed-off-by: Wei Wang Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-11-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/kvm/x86.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a204099d2e51..125181fc1a12 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1322,6 +1322,7 @@ static const u32 msrs_to_save_all[] = { MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5, MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2, MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5, + MSR_IA32_XFD, }; static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)]; @@ -3415,6 +3416,19 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vcpu->arch.msr_misc_features_enables = data; break; +#ifdef CONFIG_X86_64 + case MSR_IA32_XFD: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_XFD)) + return 1; + + if (data & ~(XFEATURE_MASK_USER_DYNAMIC & + vcpu->arch.guest_supported_xcr0)) + return 1; + + fpu_update_guest_xfd(&vcpu->arch.guest_fpu, data); + break; +#endif default: if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) return xen_hvm_config(vcpu, data); @@ -3724,6 +3738,15 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_K7_HWCR: msr_info->data = vcpu->arch.msr_hwcr; break; +#ifdef CONFIG_X86_64 + case MSR_IA32_XFD: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_XFD)) + return 1; + + msr_info->data = vcpu->arch.guest_fpu.fpstate->xfd; + break; +#endif default: if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) return kvm_pmu_get_msr(vcpu, msr_info); @@ -5923,6 +5946,10 @@ static void kvm_init_msr_list(void) min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) continue; break; + case MSR_IA32_XFD: + if (!kvm_cpu_cap_has(X86_FEATURE_XFD)) + continue; + break; default: break; } -- Gitee From 36c666e6095d4a77b27a4be87d89e305a08dee26 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Wed, 5 Jan 2022 04:35:22 -0800 Subject: [PATCH 11/37] x86/fpu: Prepare xfd_err in struct fpu_guest mainline inclusion from mainline-v5.17-rc1 commit 1df4fd834e8e2c00973ac2003ad0e6feb8750b31 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 1df4fd834e8e x86/fpu: Prepare xfd_err in struct fpu_guest. -------------------------------- When XFD causes an instruction to generate #NM, IA32_XFD_ERR contains information about which disabled state components are being accessed. The #NM handler is expected to check this information and then enable the state components by clearing IA32_XFD for the faulting task (if having permission). If the XFD_ERR value generated in guest is consumed/clobbered by the host before the guest itself doing so, it may lead to non-XFD-related #NM treated as XFD #NM in host (due to non-zero value in XFD_ERR), or XFD-related #NM treated as non-XFD #NM in guest (XFD_ERR cleared by the host #NM handler). Introduce a new field in fpu_guest to save the guest xfd_err value. KVM is expected to save guest xfd_err before interrupt is enabled and restore it right before entering the guest (with interrupt disabled). Signed-off-by: Jing Liu Signed-off-by: Kevin Tian Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-12-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/types.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 06019b78148a..8d6abf4f52b8 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -520,6 +520,11 @@ struct fpu_guest { */ u64 perm; + /* + * @xfd_err: Save the guest value. + */ + u64 xfd_err; + /* * @fpstate: Pointer to the allocated guest fpstate */ -- Gitee From 4a642360d7658b4d585381f3f7f0b84e66483a4f Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Wed, 5 Jan 2022 04:35:23 -0800 Subject: [PATCH 12/37] kvm: x86: Intercept #NM for saving IA32_XFD_ERR mainline inclusion from mainline-v5.17-rc1 commit ec5be88ab29fd9145c7ced20b58fb96f7c6b6890 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit ec5be88ab29f kvm: x86: Intercept #NM for saving IA32_XFD_ERR. -------------------------------- Guest IA32_XFD_ERR is generally modified in two places: - Set by CPU when #NM is triggered; - Cleared by guest in its #NM handler; Intercept #NM for the first case when a nonzero value is written to IA32_XFD. Nonzero indicates that the guest is willing to do dynamic fpstate expansion for certain xfeatures, thus KVM needs to manage and virtualize guest XFD_ERR properly. The vcpu exception bitmap is updated in XFD write emulation according to guest_fpu::xfd. Save the current XFD_ERR value to the guest_fpu container in the #NM VM-exit handler. This must be done with interrupt disabled, otherwise the unsaved MSR value may be clobbered by host activity. The saving operation is conducted conditionally only when guest_fpu:xfd includes a non-zero value. Doing so also avoids misread on a platform which doesn't support XFD but #NM is triggered due to L1 interception. Queueing #NM to the guest is postponed to handle_exception_nmi(). This goes through the nested_vmx check so a virtual vmexit is queued instead when #NM is triggered in L2 but L1 wants to intercept it. Restore the host value (always ZERO outside of the host #NM handler) before enabling interrupt. Restore the guest value from the guest_fpu container right before entering the guest (with interrupt disabled). Suggested-by: Thomas Gleixner Signed-off-by: Jing Liu Signed-off-by: Kevin Tian Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-13-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/kvm/vmx/vmcs.h | 5 +++++ arch/x86/kvm/vmx/vmx.c | 48 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 6 ++++++ 3 files changed, 59 insertions(+) diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 995a51635467..a32685f75b6c 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -133,6 +133,11 @@ static inline bool is_machine_check(u32 intr_info) return is_exception_n(intr_info, MC_VECTOR); } +static inline bool is_nm_fault(u32 intr_info) +{ + return is_exception_n(intr_info, NM_VECTOR); +} + /* Undocumented: icebp/int1 */ static inline bool is_icebp(u32 intr_info) { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c521474b9675..0ccaf7cf5077 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -922,6 +923,13 @@ void update_exception_bitmap(struct kvm_vcpu *vcpu) vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, mask); } + /* + * Trap #NM if guest xfd contains a non-zero value so guest XFD_ERR + * can be saved timely. + */ + if (vcpu->arch.guest_fpu.fpstate->xfd) + eb |= (1u << NM_VECTOR); + vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -2130,6 +2138,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_KERNEL_GS_BASE: vmx_write_guest_kernel_gs_base(vmx, data); break; + case MSR_IA32_XFD: + ret = kvm_set_msr_common(vcpu, msr_info); + /* Update #NM interception according to guest xfd */ + if (!ret) + update_exception_bitmap(vcpu); + break; #endif case MSR_IA32_SYSENTER_CS: if (is_guest_mode(vcpu)) @@ -5076,6 +5090,17 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) if (is_machine_check(intr_info) || is_nmi(intr_info)) return 1; /* handled by handle_exception_nmi_irqoff() */ + /* + * Queue the exception here instead of in handle_nm_fault_irqoff(). + * This ensures the nested_vmx check is not skipped so vmexit can + * be reflected to L1 (when it intercepts #NM) before reaching this + * point. + */ + if (is_nm_fault(intr_info)) { + kvm_queue_exception(vcpu, NM_VECTOR); + return 1; + } + if (is_invalid_opcode(intr_info)) return handle_ud(vcpu); @@ -6759,6 +6784,26 @@ static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, kvm_after_interrupt(vcpu); } +static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu) +{ + /* + * Save xfd_err to guest_fpu before interrupt is enabled, so the + * MSR value is not clobbered by the host activity before the guest + * has chance to consume it. + * + * Do not blindly read xfd_err here, since this exception might + * be caused by L1 interception on a platform which doesn't + * support xfd at all. + * + * Do it conditionally upon guest_fpu::xfd. xfd_err matters + * only when xfd contains a non-zero value. + * + * Queuing exception is done in vmx_handle_exit. See comment there. + */ + if (vcpu->arch.guest_fpu.fpstate->xfd) + rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err); +} + static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) { const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_noist; @@ -6767,6 +6812,9 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) /* if exit due to PF check for async PF */ if (is_page_fault(intr_info)) vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags(); + /* if exit due to NM, handle before interrupts are enabled */ + else if (is_nm_fault(intr_info)) + handle_nm_fault_irqoff(&vmx->vcpu); /* Handle machine checks before interrupts are enabled */ else if (is_machine_check(intr_info)) kvm_machine_check(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 125181fc1a12..01a9bd73ac6b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9185,6 +9185,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (test_thread_flag(TIF_NEED_FPU_LOAD)) switch_fpu_return(); + if (vcpu->arch.guest_fpu.xfd_err) + wrmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err); + if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); set_debugreg(vcpu->arch.eff_db[0], 0); @@ -9231,6 +9234,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops.handle_exit_irqoff(vcpu); + if (vcpu->arch.guest_fpu.xfd_err) + wrmsrl(MSR_IA32_XFD_ERR, 0); + /* * Consume any pending interrupts, including the possible source of * VM-Exit on SVM and any ticks that occur between VM-Exit and now. -- Gitee From 074c18766b555a36232944236896074ce79af7de Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Wed, 5 Jan 2022 04:35:24 -0800 Subject: [PATCH 13/37] kvm: x86: Emulate IA32_XFD_ERR for guest mainline inclusion from mainline-v5.17-rc1 commit 548e83650a51dce0d188b9e41b1e2ca5d63597cf category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 548e83650a51 kvm: x86: Emulate IA32_XFD_ERR for guest. -------------------------------- Emulate read/write to IA32_XFD_ERR MSR. Only the saved value in the guest_fpu container is touched in the emulation handler. Actual MSR update is handled right before entering the guest (with preemption disabled) Signed-off-by: Jing Liu Signed-off-by: Zeng Guang Signed-off-by: Wei Wang Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-14-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/kvm/x86.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 01a9bd73ac6b..05fbcd9c672e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1322,7 +1322,7 @@ static const u32 msrs_to_save_all[] = { MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5, MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2, MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5, - MSR_IA32_XFD, + MSR_IA32_XFD, MSR_IA32_XFD_ERR, }; static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)]; @@ -3428,6 +3428,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) fpu_update_guest_xfd(&vcpu->arch.guest_fpu, data); break; + case MSR_IA32_XFD_ERR: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_XFD)) + return 1; + + if (data & ~(XFEATURE_MASK_USER_DYNAMIC & + vcpu->arch.guest_supported_xcr0)) + return 1; + + vcpu->arch.guest_fpu.xfd_err = data; + break; #endif default: if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) @@ -3746,6 +3757,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.guest_fpu.fpstate->xfd; break; + case MSR_IA32_XFD_ERR: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_XFD)) + return 1; + + msr_info->data = vcpu->arch.guest_fpu.xfd_err; + break; #endif default: if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) @@ -5947,6 +5965,7 @@ static void kvm_init_msr_list(void) continue; break; case MSR_IA32_XFD: + case MSR_IA32_XFD_ERR: if (!kvm_cpu_cap_has(X86_FEATURE_XFD)) continue; break; -- Gitee From 8f85b372e5e1c9ff46f2853c3d16d95da1d778b4 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Wed, 5 Jan 2022 04:35:25 -0800 Subject: [PATCH 14/37] kvm: x86: Disable RDMSR interception of IA32_XFD_ERR mainline inclusion from mainline-v5.17-rc1 commit 61f208134a871047f1d642ed3b813f4f71e30b0e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 61f208134a87 kvm: x86: Disable RDMSR interception of IA32_XFD_ERR. -------------------------------- This saves one unnecessary VM-exit in guest #NM handler, given that the MSR is already restored with the guest value before the guest is resumed. Suggested-by: Paolo Bonzini Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-15-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/kvm/vmx/vmx.c | 6 ++++++ arch/x86/kvm/vmx/vmx.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 0ccaf7cf5077..c47b635804fe 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -166,6 +166,7 @@ static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = { MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE, + MSR_IA32_XFD_ERR, #endif MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, @@ -7738,6 +7739,11 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) } } + if (kvm_cpu_cap_has(X86_FEATURE_XFD)) + vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R, + !guest_cpuid_has(vcpu, X86_FEATURE_XFD)); + + set_cr4_guest_host_mask(vmx); vmx_write_encls_bitmap(vcpu, NULL); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 030dbd5c5a1f..cf23008b5029 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -336,7 +336,7 @@ struct vcpu_vmx { struct lbr_desc lbr_desc; /* Save desired MSR intercept (read: pass-through) state */ -#define MAX_POSSIBLE_PASSTHROUGH_MSRS 13 +#define MAX_POSSIBLE_PASSTHROUGH_MSRS 14 struct { DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS); DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS); -- Gitee From 05370b7c7e58a16ff3def9dc1a4d322327fa05f5 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Wed, 5 Jan 2022 04:35:26 -0800 Subject: [PATCH 15/37] kvm: x86: Add XCR0 support for Intel AMX mainline inclusion from mainline-v5.17-rc1 commit 86aff7a4799286635efd94dab17b513544703cad category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 86aff7a47992 kvm: x86: Add XCR0 support for Intel AMX. -------------------------------- Two XCR0 bits are defined for AMX to support XSAVE mechanism. Bit 17 is for tilecfg and bit 18 is for tiledata. The value of XCR0[17:18] is always either 00b or 11b. Also, SDM recommends that only 64-bit operating systems enable Intel AMX by setting XCR0[18:17]. 32-bit host kernel never sets the tile bits in vcpu->arch.guest_supported_xcr0. Signed-off-by: Jing Liu Signed-off-by: Kevin Tian Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-16-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/kvm/x86.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 05fbcd9c672e..0ad99649597d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -199,7 +199,7 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs; #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \ - | XFEATURE_MASK_PKRU) + | XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE) u64 __read_mostly host_efer; EXPORT_SYMBOL_GPL(host_efer); @@ -1020,6 +1020,11 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512) return 1; } + + if ((xcr0 & XFEATURE_MASK_XTILE) && + ((xcr0 & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE)) + return 1; + vcpu->arch.xcr0 = xcr0; if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND) -- Gitee From 2e6f9d2bc5faf4491c639ce4a41a1c790d81e368 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Wed, 5 Jan 2022 04:35:27 -0800 Subject: [PATCH 16/37] kvm: x86: Add CPUID support for Intel AMX mainline inclusion from mainline-v5.17-rc1 commit 690a757d610e50c2c3acd2e4bc3992cfc63feff2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 690a757d610e kvm: x86: Add CPUID support for Intel AMX. -------------------------------- Extend CPUID emulation to support XFD, AMX_TILE, AMX_INT8 and AMX_BF16. Adding those bits into kvm_cpu_caps finally activates all previous logics in this series. Hide XFD on 32bit host kernels. Otherwise it leads to a weird situation where KVM tells userspace to migrate MSR_IA32_XFD and then rejects attempts to read/write the MSR. Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-17-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/kvm/cpuid.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7113572f3e8c..7916e66e9d88 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -408,9 +408,11 @@ void kvm_set_cpu_caps(void) #ifdef CONFIG_X86_64 unsigned int f_gbpages = F(GBPAGES); unsigned int f_lm = F(LM); + unsigned int f_xfd = F(XFD); #else unsigned int f_gbpages = 0; unsigned int f_lm = 0; + unsigned int f_xfd = 0; #endif memset(kvm_cpu_caps, 0, sizeof(kvm_cpu_caps)); @@ -478,7 +480,8 @@ void kvm_set_cpu_caps(void) F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | - F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) + F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) | + F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) ); /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ @@ -497,7 +500,7 @@ void kvm_set_cpu_caps(void) ); kvm_cpu_cap_mask(CPUID_D_1_EAX, - F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) + F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd ); kvm_cpu_cap_init_scattered(CPUID_12_EAX, @@ -603,6 +606,8 @@ static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array, case 0x14: case 0x17: case 0x18: + case 0x1d: + case 0x1e: case 0x1f: case 0x8000001d: entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; @@ -879,6 +884,24 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) goto out; } break; + /* Intel AMX TILE */ + case 0x1d: + if (!kvm_cpu_cap_has(X86_FEATURE_AMX_TILE)) { + entry->eax = entry->ebx = entry->ecx = entry->edx = 0; + break; + } + + for (i = 1, max_idx = entry->eax; i <= max_idx; ++i) { + if (!do_host_cpuid(array, function, i)) + goto out; + } + break; + case 0x1e: /* TMUL information */ + if (!kvm_cpu_cap_has(X86_FEATURE_AMX_TILE)) { + entry->eax = entry->ebx = entry->ecx = entry->edx = 0; + break; + } + break; case KVM_CPUID_SIGNATURE: { static const char signature[12] = "KVMKVMKVM\0\0"; const u32 *sigptr = (const u32 *)signature; -- Gitee From 8462c730ca92c26c591a2588d09caa6a267c08fb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 5 Jan 2022 04:35:28 -0800 Subject: [PATCH 17/37] x86/fpu: Add uabi_size to guest_fpu mainline inclusion from mainline-v5.17-rc1 commit c60427dd50ba9b20063ccaed0e98d62e886d7a3b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit c60427dd50ba x86/fpu: Add uabi_size to guest_fpu. -------------------------------- Userspace needs to inquire KVM about the buffer size to work with the new KVM_SET_XSAVE and KVM_GET_XSAVE2. Add the size info to guest_fpu for KVM to access. Signed-off-by: Thomas Gleixner Signed-off-by: Wei Wang Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-18-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/types.h | 5 +++++ arch/x86/kernel/fpu/core.c | 1 + arch/x86/kernel/fpu/xstate.c | 1 + 3 files changed, 7 insertions(+) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 8d6abf4f52b8..dde4bd43c7cc 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -525,6 +525,11 @@ struct fpu_guest { */ u64 xfd_err; + /* + * @uabi_size: Size required for save/restore + */ + unsigned int uabi_size; + /* * @fpstate: Pointer to the allocated guest fpstate */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 5dc3580dc20e..0c2b60a703cc 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -224,6 +224,7 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) gfpu->fpstate = fpstate; gfpu->xfeatures = fpu_user_cfg.default_features; gfpu->perm = fpu_user_cfg.default_features; + gfpu->uabi_size = fpu_user_cfg.default_size; fpu_init_guest_permissions(gfpu); return true; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 9883ede45dc9..52c765860e09 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1545,6 +1545,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, newfps->is_confidential = curfps->is_confidential; newfps->in_use = curfps->in_use; guest_fpu->xfeatures |= xfeatures; + guest_fpu->uabi_size = usize; } fpregs_lock(); -- Gitee From 0d9e57e18b51230062cced49feebde68042f2796 Mon Sep 17 00:00:00 2001 From: Guang Zeng Date: Wed, 5 Jan 2022 04:35:29 -0800 Subject: [PATCH 18/37] kvm: x86: Add support for getting/setting expanded xstate buffer mainline inclusion from mainline-v5.17-rc1 commit be50b2065dfa3d88428fdfdc340d154d96bf6848 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit be50b2065dfa kvm: x86: Add support for getting/setting expanded xstate buffer. -------------------------------- With KVM_CAP_XSAVE, userspace uses a hardcoded 4KB buffer to get/set xstate data from/to KVM. This doesn't work when dynamic xfeatures (e.g. AMX) are exposed to the guest as they require a larger buffer size. Introduce a new capability (KVM_CAP_XSAVE2). Userspace VMM gets the required xstate buffer size via KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2). KVM_SET_XSAVE is extended to work with both legacy and new capabilities by doing properly-sized memdup_user() based on the guest fpu container. KVM_GET_XSAVE is kept for backward-compatible reason. Instead, KVM_GET_XSAVE2 is introduced under KVM_CAP_XSAVE2 as the preferred interface for getting xstate buffer (4KB or larger size) from KVM (Link: https://lkml.org/lkml/2021/12/15/510) Also, update the api doc with the new KVM_GET_XSAVE2 ioctl. Signed-off-by: Guang Zeng Signed-off-by: Wei Wang Signed-off-by: Jing Liu Signed-off-by: Kevin Tian Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-19-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- Documentation/virt/kvm/api.rst | 41 ++++++++++++++++++++++++++++-- arch/x86/include/uapi/asm/kvm.h | 16 +++++++++++- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/cpuid.h | 2 ++ arch/x86/kvm/x86.c | 45 ++++++++++++++++++++++++++++++++- include/uapi/linux/kvm.h | 4 +++ 6 files changed, 105 insertions(+), 5 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index dc2da951208a..9846cc4f5389 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1514,6 +1514,7 @@ is vcpu 0. struct kvm_xsave { __u32 region[1024]; + __u32 extra[0]; }; This ioctl would copy current vcpu's xsave struct to the userspace. @@ -1522,7 +1523,7 @@ This ioctl would copy current vcpu's xsave struct to the userspace. 4.43 KVM_SET_XSAVE ------------------ -:Capability: KVM_CAP_XSAVE +:Capability: KVM_CAP_XSAVE and KVM_CAP_XSAVE2 :Architectures: x86 :Type: vcpu ioctl :Parameters: struct kvm_xsave (in) @@ -1533,9 +1534,18 @@ This ioctl would copy current vcpu's xsave struct to the userspace. struct kvm_xsave { __u32 region[1024]; + __u32 extra[0]; }; -This ioctl would copy userspace's xsave struct to the kernel. +This ioctl would copy userspace's xsave struct to the kernel. It copies +as many bytes as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2), +when invoked on the vm file descriptor. The size value returned by +KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) will always be at least 4096. +Currently, it is only greater than 4096 if a dynamic feature has been +enabled with ``arch_prctl()``, but this may change in the future. + +The offsets of the state save areas in struct kvm_xsave follow the +contents of CPUID leaf 0xD on the host. 4.44 KVM_GET_XCRS @@ -4983,6 +4993,33 @@ KVM does guarantee that vCPUs will see either the previous filter or the new filter, e.g. MSRs with identical settings in both the old and new filter will have deterministic behavior. +4.134 KVM_GET_XSAVE2 +-------------------- + +:Capability: KVM_CAP_XSAVE2 +:Architectures: x86 +:Type: vcpu ioctl +:Parameters: struct kvm_xsave (out) +:Returns: 0 on success, -1 on error + + +:: + + struct kvm_xsave { + __u32 region[1024]; + __u32 extra[0]; + }; + +This ioctl would copy current vcpu's xsave struct to the userspace. It +copies as many bytes as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) +when invoked on the vm file descriptor. The size value returned by +KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) will always be at least 4096. +Currently, it is only greater than 4096 if a dynamic feature has been +enabled with ``arch_prctl()``, but this may change in the future. + +The offsets of the state save areas in struct kvm_xsave follow the contents +of CPUID leaf 0xD on the host. + 5. The kvm_run structure ======================== diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 0e0694a0a4f4..1552d6024e21 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -362,9 +362,23 @@ struct kvm_debugregs { __u64 reserved[9]; }; -/* for KVM_CAP_XSAVE */ +/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */ struct kvm_xsave { + /* + * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes + * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) + * respectively, when invoked on the vm file descriptor. + * + * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) + * will always be at least 4096. Currently, it is only greater + * than 4096 if a dynamic feature has been enabled with + * ``arch_prctl()``, but this may change in the future. + * + * The offsets of the state save areas in struct kvm_xsave follow + * the contents of CPUID leaf 0xD on the host. + */ __u32 region[1024]; + __u32 extra[0]; }; #define KVM_MAX_XCRS 16 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7916e66e9d88..e72a6f78faf6 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -32,7 +32,7 @@ u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly; EXPORT_SYMBOL_GPL(kvm_cpu_caps); -static u32 xstate_required_size(u64 xstate_bv, bool compacted) +u32 xstate_required_size(u64 xstate_bv, bool compacted) { int feature_bit = 0; u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 9fbc0de7c337..63d96ec61cd3 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -47,6 +47,8 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool exact_only); +u32 xstate_required_size(u64 xstate_bv, bool compacted); + int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ad99649597d..82ab557915c9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3991,6 +3991,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) else r = 0; break; + case KVM_CAP_XSAVE2: { + u64 guest_perm = xstate_get_guest_group_perm(); + + r = xstate_required_size(supported_xcr0 & guest_perm, false); + if (r < sizeof(struct kvm_xsave)) + r = sizeof(struct kvm_xsave); + break; + } case KVM_CAP_X86_NOTIFY_VMEXIT: r = kvm_has_notify_vmexit; break; @@ -4634,6 +4642,16 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, vcpu->arch.pkru); } +static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, + u8 *state, unsigned int size) +{ + if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) + return; + + fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, + state, size, vcpu->arch.pkru); +} + static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { @@ -4953,6 +4971,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_GET_XSAVE: { + r = -EINVAL; + if (vcpu->arch.guest_fpu.uabi_size > sizeof(struct kvm_xsave)) + break; + u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT); r = -ENOMEM; if (!u.xsave) @@ -4967,7 +4989,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_SET_XSAVE: { - u.xsave = memdup_user(argp, sizeof(*u.xsave)); + int size = vcpu->arch.guest_fpu.uabi_size; + + u.xsave = memdup_user(argp, size); if (IS_ERR(u.xsave)) { r = PTR_ERR(u.xsave); goto out_nofree; @@ -4976,6 +5000,25 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); break; } + + case KVM_GET_XSAVE2: { + int size = vcpu->arch.guest_fpu.uabi_size; + + u.xsave = kzalloc(size, GFP_KERNEL_ACCOUNT); + r = -ENOMEM; + if (!u.xsave) + break; + + kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size); + + r = -EFAULT; + if (copy_to_user(argp, u.xsave, size)) + break; + + r = 0; + break; + } + case KVM_GET_XCRS: { u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT); r = -ENOMEM; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e1c3ef841eb8..0656ef70caf9 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1070,6 +1070,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_SGX_ATTRIBUTE 196 +#define KVM_CAP_XSAVE2 208 #define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218 #define KVM_CAP_X86_NOTIFY_VMEXIT 219 @@ -1558,6 +1559,9 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + struct kvm_s390_pv_sec_parm { __u64 origin; __u64 length; -- Gitee From e2b78c666e7b13fda2b92fbb29ed2f0b6b3e5e11 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 5 Jan 2022 04:35:30 -0800 Subject: [PATCH 19/37] kvm: selftests: Add support for KVM_CAP_XSAVE2 mainline inclusion from mainline-v5.17-rc1 commit 415a3c33e847349c0f76575b3ebfdfae2f5a681a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 415a3c33e847 kvm: selftests: Add support for KVM_CAP_XSAVE2. -------------------------------- When KVM_CAP_XSAVE2 is supported, userspace is expected to allocate buffer for KVM_GET_XSAVE2 and KVM_SET_XSAVE using the size returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2). Signed-off-by: Wei Wang Signed-off-by: Guang Zeng Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-20-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- tools/arch/x86/include/uapi/asm/kvm.h | 16 ++++- tools/include/uapi/linux/kvm.h | 3 + .../testing/selftests/kvm/include/kvm_util.h | 1 + .../selftests/kvm/include/x86_64/processor.h | 10 +++ tools/testing/selftests/kvm/lib/kvm_util.c | 12 ++-- .../selftests/kvm/lib/x86_64/processor.c | 72 ++++++++++++++++++- .../testing/selftests/kvm/x86_64/evmcs_test.c | 2 +- tools/testing/selftests/kvm/x86_64/smm_test.c | 2 +- .../testing/selftests/kvm/x86_64/state_test.c | 2 +- .../kvm/x86_64/vmx_preemption_timer_test.c | 2 +- 10 files changed, 108 insertions(+), 14 deletions(-) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 9a702e05fd7d..57971903e01f 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -358,9 +358,23 @@ struct kvm_debugregs { __u64 reserved[9]; }; -/* for KVM_CAP_XSAVE */ +/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */ struct kvm_xsave { + /* + * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes + * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) + * respectively, when invoked on the vm file descriptor. + * + * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) + * will always be at least 4096. Currently, it is only greater + * than 4096 if a dynamic feature has been enabled with + * ``arch_prctl()``, but this may change in the future. + * + * The offsets of the state save areas in struct kvm_xsave follow + * the contents of CPUID leaf 0xD on the host. + */ __u32 region[1024]; + __u32 extra[0]; }; #define KVM_MAX_XCRS 16 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index ca41220b40b8..7986e3b6d706 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1053,6 +1053,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_USER_SPACE_MSR 188 #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 +#define KVM_CAP_XSAVE2 207 #ifdef KVM_CAP_IRQ_ROUTING @@ -1462,6 +1463,8 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_XSAVE */ #define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct kvm_xsave) #define KVM_SET_XSAVE _IOW(KVMIO, 0xa5, struct kvm_xsave) +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) /* Available with KVM_CAP_XCRS */ #define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) #define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 85b621383fa1..40021f85033e 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -256,6 +256,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, * guest_code - The vCPU's entry point */ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); +void vm_xsave_req_perm(void); bool vm_is_unrestricted_guest(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 8e61340b3911..bc2b12441836 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -10,8 +10,10 @@ #include #include +#include #include +#include #define X86_EFLAGS_FIXED (1u << 1) @@ -319,6 +321,7 @@ struct kvm_x86_state; struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state); +void kvm_x86_state_cleanup(struct kvm_x86_state *state); struct kvm_msr_list *kvm_get_msr_index_list(void); @@ -419,4 +422,11 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, /* VMX_EPT_VPID_CAP bits */ #define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21) +#define XSTATE_XTILE_CFG_BIT 17 +#define XSTATE_XTILE_DATA_BIT 18 + +#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) +#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) +#define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK | \ + XSTATE_XTILE_DATA_MASK) #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index d9de68b44783..9667654b3d77 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -66,15 +66,15 @@ int kvm_check_cap(long cap) /* VM Check Capability * * Input Args: - * vm - Virtual Machine - * cap - Capability + * vm - Virtual Machine + * cap - Capability * * Output Args: None * * Return: - * On success, the Value corresponding to the capability (KVM_CAP_*) - * specified by the value of cap. On failure a TEST_ASSERT failure - * is produced. + * On success, the Value corresponding to the capability (KVM_CAP_*) + * specified by the value of cap. On failure a TEST_ASSERT failure + * is produced. * * Looks up and returns the value corresponding to the capability * (KVM_CAP_*) given by cap. @@ -85,7 +85,7 @@ int vm_check_cap(struct kvm_vm *vm, long cap) ret = ioctl(vm->fd, KVM_CHECK_EXTENSION, cap); TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION VM IOCTL failed,\n" - " rc: %i errno: %i", ret, errno); + " rc: %i errno: %i", ret, errno); return ret; } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index f5d2d27bee05..cfcf49360999 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -580,6 +580,45 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m vcpu_sregs_set(vm, vcpuid, &sregs); } +#define CPUID_XFD_BIT (1 << 4) +static bool is_xfd_supported(void) +{ + int eax, ebx, ecx, edx; + const int leaf = 0xd, subleaf = 0x1; + + __asm__ __volatile__( + "cpuid" + : /* output */ "=a"(eax), "=b"(ebx), + "=c"(ecx), "=d"(edx) + : /* input */ "0"(leaf), "2"(subleaf)); + + return !!(eax & CPUID_XFD_BIT); +} + +void vm_xsave_req_perm(void) +{ + unsigned long bitmask; + long rc; + + if (!is_xfd_supported()) + return; + + rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, + XSTATE_XTILE_DATA_BIT); + /* + * The older kernel version(<5.15) can't support + * ARCH_REQ_XCOMP_GUEST_PERM and directly return. + */ + if (rc) + return; + + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); + TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); + TEST_ASSERT(bitmask & XFEATURE_XTILE_MASK, + "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", + bitmask); +} + void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) { struct kvm_mp_state mp_state; @@ -588,6 +627,11 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); + /* + * Permission needs to be requested before KVM_SET_CPUID2. + */ + vm_xsave_req_perm(); + /* Create VCPU */ vm_vcpu_add(vm, vcpuid); vcpu_setup(vm, vcpuid, 0, 0); @@ -904,10 +948,10 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) } struct kvm_x86_state { + struct kvm_xsave *xsave; struct kvm_vcpu_events events; struct kvm_mp_state mp_state; struct kvm_regs regs; - struct kvm_xsave xsave; struct kvm_xcrs xcrs; struct kvm_sregs sregs; struct kvm_debugregs debugregs; @@ -957,6 +1001,22 @@ struct kvm_msr_list *kvm_get_msr_index_list(void) return list; } +static int vcpu_save_xsave_state(struct kvm_vm *vm, struct vcpu *vcpu, + struct kvm_x86_state *state) +{ + int size; + + size = vm_check_cap(vm, KVM_CAP_XSAVE2); + if (!size) + size = sizeof(struct kvm_xsave); + + state->xsave = malloc(size); + if (size == sizeof(struct kvm_xsave)) + return ioctl(vcpu->fd, KVM_GET_XSAVE, state->xsave); + else + return ioctl(vcpu->fd, KVM_GET_XSAVE2, state->xsave); +} + struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); @@ -1000,7 +1060,7 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", r); - r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave); + r = vcpu_save_xsave_state(vm, vcpu, state); TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", r); @@ -1045,7 +1105,7 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s struct vcpu *vcpu = vcpu_find(vm, vcpuid); int r; - r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave); + r = ioctl(vcpu->fd, KVM_SET_XSAVE, state->xsave); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", r); @@ -1086,6 +1146,12 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s } } +void kvm_x86_state_cleanup(struct kvm_x86_state *state) +{ + free(state->xsave); + free(state); +} + bool is_intel_cpu(void) { int eax, ebx, ecx, edx; diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 757928199f19..1d4c48b800ec 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -148,7 +148,7 @@ int main(int argc, char *argv[]) vcpu_enable_evmcs(vm, VCPU_ID); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); - free(state); + kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); vcpu_regs_get(vm, VCPU_ID, ®s2); diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index ae39a220609f..c3aa72819d28 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -156,7 +156,7 @@ int main(int argc, char *argv[]) vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); - free(state); + kvm_x86_state_cleanup(state); } done: diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index f6c8b9042f8a..78875403aae6 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -219,7 +219,7 @@ int main(int argc, char *argv[]) vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); - free(state); + kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); vcpu_regs_get(vm, VCPU_ID, ®s2); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c index a7737af1224f..6921df933098 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c @@ -245,7 +245,7 @@ int main(int argc, char *argv[]) vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); - free(state); + kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); vcpu_regs_get(vm, VCPU_ID, ®s2); -- Gitee From f78bddda5cd41302d61e5944e77782dca6117b46 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 5 Jan 2022 04:35:31 -0800 Subject: [PATCH 20/37] x86/fpu: Provide fpu_sync_guest_vmexit_xfd_state() mainline inclusion from mainline-v5.17-rc1 commit 5429cead01192ff4019ea0b13316268d14fd1ec2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 5429cead0119 x86/fpu: Provide fpu_sync_guest_vmexit_xfd_state(). -------------------------------- KVM can disable the write emulation for the XFD MSR when the vCPU's fpstate is already correctly sized to reduce the overhead. When write emulation is disabled the XFD MSR state after a VMEXIT is unknown and therefore not in sync with the software states in fpstate and the per CPU XFD cache. Provide fpu_sync_guest_vmexit_xfd_state() which has to be invoked after a VMEXIT before enabling interrupts when write emulation is disabled for the XFD MSR. It could be invoked unconditionally even when write emulation is enabled for the price of a pointless MSR read. Signed-off-by: Thomas Gleixner Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-21-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 2 ++ arch/x86/kernel/fpu/core.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index f7c947b09acb..c8a6034019c8 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -123,8 +123,10 @@ extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatu #ifdef CONFIG_X86_64 extern void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd); +extern void fpu_sync_guest_vmexit_xfd_state(void); #else static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { } +static inline void fpu_sync_guest_vmexit_xfd_state(void) { } #endif extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 0c2b60a703cc..632ceb07e90f 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -278,6 +278,30 @@ void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) fpregs_unlock(); } EXPORT_SYMBOL_GPL(fpu_update_guest_xfd); + +/** + * fpu_sync_guest_vmexit_xfd_state - Synchronize XFD MSR and software state + * + * Must be invoked from KVM after a VMEXIT before enabling interrupts when + * XFD write emulation is disabled. This is required because the guest can + * freely modify XFD and the state at VMEXIT is not guaranteed to be the + * same as the state on VMENTER. So software state has to be udpated before + * any operation which depends on it can take place. + * + * Note: It can be invoked unconditionally even when write emulation is + * enabled for the price of a then pointless MSR read. + */ +void fpu_sync_guest_vmexit_xfd_state(void) +{ + struct fpstate *fps = current->thread.fpu.fpstate; + + lockdep_assert_irqs_disabled(); + if (fpu_state_size_dynamic()) { + rdmsrl(MSR_IA32_XFD, fps->xfd); + __this_cpu_write(xfd_state, fps->xfd); + } +} +EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state); #endif /* CONFIG_X86_64 */ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) -- Gitee From 7b32cbb5111db72a827f64bb56407d9abf68b9dc Mon Sep 17 00:00:00 2001 From: Kevin Tian Date: Wed, 5 Jan 2022 04:35:32 -0800 Subject: [PATCH 21/37] kvm: x86: Disable interception for IA32_XFD on demand mainline inclusion from mainline-v5.17-rc1 commit b5274b1b7ba89fe8ed38cc470041cd6ba0dfb79b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit b5274b1b7ba8 kvm: x86: Disable interception for IA32_XFD on demand. -------------------------------- Always intercepting IA32_XFD causes non-negligible overhead when this register is updated frequently in the guest. Disable r/w emulation after intercepting the first WRMSR(IA32_XFD) with a non-zero value. Disable WRMSR emulation implies that IA32_XFD becomes out-of-sync with the software states in fpstate and the per-cpu xfd cache. This leads to two additional changes accordingly: - Call fpu_sync_guest_vmexit_xfd_state() after vm-exit to bring software states back in-sync with the MSR, before handle_exit_irqoff() is called. - Always trap #NM once write interception is disabled for IA32_XFD. The #NM exception is rare if the guest doesn't use dynamic features. Otherwise, there is at most one exception per guest task given a dynamic feature. p.s. We have confirmed that SDM is being revised to say that when setting IA32_XFD[18] the AMX register state is not guaranteed to be preserved. This clarification avoids adding mess for a creative guest which sets IA32_XFD[18]=1 before saving active AMX state to its own storage. Signed-off-by: Kevin Tian Signed-off-by: Jing Liu Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-22-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/vmx.c | 24 +++++++++++++++++++----- arch/x86/kvm/vmx/vmx.h | 2 +- arch/x86/kvm/x86.c | 8 ++++++++ 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 17b80ad95385..38e2feb6dca4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -575,6 +575,7 @@ struct kvm_vcpu_arch { bool at_instruction_boundary; bool tpr_access_reporting; bool xsaves_enabled; + bool xfd_no_write_intercept; u64 ia32_xss; u64 microcode_version; u64 arch_capabilities; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c47b635804fe..d89ec9012267 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -166,6 +166,7 @@ static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = { MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE, + MSR_IA32_XFD, MSR_IA32_XFD_ERR, #endif MSR_IA32_SYSENTER_CS, @@ -925,10 +926,11 @@ void update_exception_bitmap(struct kvm_vcpu *vcpu) } /* - * Trap #NM if guest xfd contains a non-zero value so guest XFD_ERR - * can be saved timely. + * Disabling xfd interception indicates that dynamic xfeatures + * might be used in the guest. Always trap #NM in this case + * to save guest xfd_err timely. */ - if (vcpu->arch.guest_fpu.fpstate->xfd) + if (vcpu->arch.xfd_no_write_intercept) eb |= (1u << NM_VECTOR); vmcs_write32(EXCEPTION_BITMAP, eb); @@ -2141,9 +2143,21 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_XFD: ret = kvm_set_msr_common(vcpu, msr_info); - /* Update #NM interception according to guest xfd */ - if (!ret) + /* + * Always intercepting WRMSR could incur non-negligible + * overhead given xfd might be changed frequently in + * guest context switch. Disable write interception + * upon the first write with a non-zero value (indicating + * potential usage on dynamic xfeatures). Also update + * exception bitmap to trap #NM for proper virtualization + * of guest xfd_err. + */ + if (!ret && data) { + vmx_disable_intercept_for_msr(vcpu, MSR_IA32_XFD, + MSR_TYPE_RW); + vcpu->arch.xfd_no_write_intercept = true; update_exception_bitmap(vcpu); + } break; #endif case MSR_IA32_SYSENTER_CS: diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index cf23008b5029..fd16c4d86909 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -336,7 +336,7 @@ struct vcpu_vmx { struct lbr_desc lbr_desc; /* Save desired MSR intercept (read: pass-through) state */ -#define MAX_POSSIBLE_PASSTHROUGH_MSRS 14 +#define MAX_POSSIBLE_PASSTHROUGH_MSRS 15 struct { DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS); DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 82ab557915c9..f51ffcf900b3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9299,6 +9299,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); + /* + * Sync xfd before calling handle_exit_irqoff() which may + * rely on the fact that guest_fpu::xfd is up-to-date (e.g. + * in #NM irqoff handler). + */ + if (vcpu->arch.xfd_no_write_intercept) + fpu_sync_guest_vmexit_xfd_state(); + kvm_x86_ops.handle_exit_irqoff(vcpu); if (vcpu->arch.guest_fpu.xfd_err) -- Gitee From 0df0ed6d5cf0871d90962a7367f4ca4ef83ec88a Mon Sep 17 00:00:00 2001 From: Yang Zhong Date: Thu, 13 Jan 2022 13:08:25 -0500 Subject: [PATCH 22/37] x86/fpu: Fix inline prefix warnings mainline inclusion from mainline-v5.17-rc1 commit c862dcd199759d4a45e65dab47b03e3e8a144e3a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit c862dcd19975 x86/fpu: Fix inline prefix warnings. -------------------------------- Fix sparse warnings in xstate and remove inline prefix. Fixes: 980fe2fddcff ("x86/fpu: Extend fpu_xstate_prctl() with guest permissions") Signed-off-by: Yang Zhong Reported-by: kernel test robot Message-Id: <20220113180825.322333-1-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/api.h | 2 +- arch/x86/kernel/fpu/xstate.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index c8a6034019c8..27f3875be8b2 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -113,7 +113,7 @@ static inline void fpstate_free(struct fpu *fpu) { } /* fpstate-related functions which are exported to KVM */ extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature); -extern inline u64 xstate_get_guest_group_perm(void); +extern u64 xstate_get_guest_group_perm(void); /* KVM specific functions */ extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 52c765860e09..3515df0ec5e2 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1744,7 +1744,7 @@ static inline int xstate_request_perm(unsigned long idx, bool guest) } #endif /* !CONFIG_X86_64 */ -inline u64 xstate_get_guest_group_perm(void) +u64 xstate_get_guest_group_perm(void) { return xstate_get_group_perm(true); } -- Gitee From fda30b1cb02e65fc1591fc2a63b1a33782940dc3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 23 Dec 2021 09:53:20 -0500 Subject: [PATCH 23/37] selftest: kvm: Reorder vcpu_load_state steps for AMX mainline inclusion from mainline-v5.17-rc1 commit 551447cfa5dc208b7fba7aa98391d5cc8149fa5a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 551447cfa5dc selftest: kvm: Reorder vcpu_load_state steps for AMX. -------------------------------- For AMX support it is recommended to load XCR0 after XFD, so that KVM does not see XFD=0, XCR=1 for a save state that will eventually be disabled (which would lead to premature allocation of the space required for that save state). It is also required to load XSAVE data after XCR0 and XFD, so that KVM can trigger allocation of the extra space required to store AMX state. Adjust vcpu_load_state to obey these new requirements. Signed-off-by: Paolo Bonzini Signed-off-by: Yang Zhong Message-Id: <20211223145322.2914028-2-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- .../selftests/kvm/lib/x86_64/processor.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index cfcf49360999..5a78c0340887 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1105,24 +1105,25 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s struct vcpu *vcpu = vcpu_find(vm, vcpuid); int r; - r = ioctl(vcpu->fd, KVM_SET_XSAVE, state->xsave); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", + r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", r); + r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); + TEST_ASSERT(r == state->msrs.nmsrs, + "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", + r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); + if (kvm_check_cap(KVM_CAP_XCRS)) { r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", r); } - r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", + r = ioctl(vcpu->fd, KVM_SET_XSAVE, state->xsave); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", r); - r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); - TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", - r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); - r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", r); -- Gitee From 9c5136f679e8b0982638a5dd08bd0de73a316b23 Mon Sep 17 00:00:00 2001 From: Yang Zhong Date: Thu, 23 Dec 2021 09:53:21 -0500 Subject: [PATCH 24/37] selftest: kvm: Move struct kvm_x86_state to header mainline inclusion from mainline-v5.17-rc1 commit 6559b4a523cd65f6005b4592833b16ba970abdf5 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 6559b4a523cd selftest: kvm: Move struct kvm_x86_state to header. -------------------------------- Those changes can avoid dereferencing pointer compile issue when amx_test.c reference state->xsave. Move struct kvm_x86_state definition to processor.h. Signed-off-by: Yang Zhong Message-Id: <20211223145322.2914028-3-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- .../selftests/kvm/include/x86_64/processor.h | 16 +++++++++++++++- .../testing/selftests/kvm/lib/x86_64/processor.c | 15 --------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index bc2b12441836..ebd86c326e02 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -74,6 +74,21 @@ struct desc_ptr { uint64_t address; } __attribute__((packed)); +struct kvm_x86_state { + struct kvm_xsave *xsave; + struct kvm_vcpu_events events; + struct kvm_mp_state mp_state; + struct kvm_regs regs; + struct kvm_xcrs xcrs; + struct kvm_sregs sregs; + struct kvm_debugregs debugregs; + union { + struct kvm_nested_state nested; + char nested_[16384]; + }; + struct kvm_msrs msrs; +}; + static inline uint64_t get_desc64_base(const struct desc64 *desc) { return ((uint64_t)desc->base3 << 32) | @@ -317,7 +332,6 @@ static inline unsigned long get_xmm(int n) bool is_intel_cpu(void); -struct kvm_x86_state; struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 5a78c0340887..82e9e5b1369c 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -947,21 +947,6 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) sregs_dump(stream, &sregs, indent + 4); } -struct kvm_x86_state { - struct kvm_xsave *xsave; - struct kvm_vcpu_events events; - struct kvm_mp_state mp_state; - struct kvm_regs regs; - struct kvm_xcrs xcrs; - struct kvm_sregs sregs; - struct kvm_debugregs debugregs; - union { - struct kvm_nested_state nested; - char nested_[16384]; - }; - struct kvm_msrs msrs; -}; - static int kvm_get_num_msrs_fd(int kvm_fd) { struct kvm_msr_list nmsrs; -- Gitee From 990f03721eda314fea4a661c6c864b09f9639caa Mon Sep 17 00:00:00 2001 From: Yang Zhong Date: Thu, 23 Dec 2021 09:53:22 -0500 Subject: [PATCH 25/37] selftest: kvm: Add amx selftest mainline inclusion from mainline-v5.17-rc1 commit bf70636d9443c9e0718fd98765ba634e631ed079 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit bf70636d9443 selftest: kvm: Add amx selftest. -------------------------------- This selftest covers two aspects of AMX. The first is triggering #NM exception and checking the MSR XFD_ERR value. The second case is loading tile config and tile data into guest registers and trapping to the host side for a complete save/load of the guest state. TMM0 is also checked against memory data after save/restore. Signed-off-by: Yang Zhong Message-Id: <20211223145322.2914028-4-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/x86_64/amx_test.c | 461 ++++++++++++++++++ 2 files changed, 462 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/amx_test.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 2756bf2d84b9..5df8da429a83 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -61,6 +61,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test +TEST_GEN_PROGS_x86_64 += x86_64/amx_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c new file mode 100644 index 000000000000..90021e429b28 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -0,0 +1,461 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * amx tests + * + * Copyright (C) 2021, Intel, Inc. + * + * Tests for amx #NM exception and save/restore. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#ifndef __x86_64__ +# error This test is 64-bit only +#endif + +#define VCPU_ID 0 +#define X86_FEATURE_XSAVE (1 << 26) +#define X86_FEATURE_OSXSAVE (1 << 27) + +#define PAGE_SIZE (1 << 12) +#define NUM_TILES 8 +#define TILE_SIZE 1024 +#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE) + +/* Tile configuration associated: */ +#define MAX_TILES 16 +#define RESERVED_BYTES 14 + +#define XFEATURE_XTILECFG 17 +#define XFEATURE_XTILEDATA 18 +#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG) +#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA) +#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA) + +#define TILE_CPUID 0x1d +#define XSTATE_CPUID 0xd +#define TILE_PALETTE_CPUID_SUBLEAVE 0x1 +#define XSTATE_USER_STATE_SUBLEAVE 0x0 + +#define XSAVE_HDR_OFFSET 512 + +struct xsave_data { + u8 area[XSAVE_SIZE]; +} __aligned(64); + +struct tile_config { + u8 palette_id; + u8 start_row; + u8 reserved[RESERVED_BYTES]; + u16 colsb[MAX_TILES]; + u8 rows[MAX_TILES]; +}; + +struct tile_data { + u8 data[NUM_TILES * TILE_SIZE]; +}; + +struct xtile_info { + u16 bytes_per_tile; + u16 bytes_per_row; + u16 max_names; + u16 max_rows; + u32 xsave_offset; + u32 xsave_size; +}; + +static struct xtile_info xtile; + +static inline u64 __xgetbv(u32 index) +{ + u32 eax, edx; + + asm volatile("xgetbv;" + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((u64)edx << 32); +} + +static inline void __xsetbv(u32 index, u64 value) +{ + u32 eax = value; + u32 edx = value >> 32; + + asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); +} + +static inline void __ldtilecfg(void *cfg) +{ + asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00" + : : "a"(cfg)); +} + +static inline void __tileloadd(void *tile) +{ + asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10" + : : "a"(tile), "d"(0)); +} + +static inline void __tilerelease(void) +{ + asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::); +} + +static inline void __xsavec(struct xsave_data *data, uint64_t rfbm) +{ + uint32_t rfbm_lo = rfbm; + uint32_t rfbm_hi = rfbm >> 32; + + asm volatile("xsavec (%%rdi)" + : : "D" (data), "a" (rfbm_lo), "d" (rfbm_hi) + : "memory"); +} + +static inline void cpuid(uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx) + : "memory"); +} + +static inline void check_cpuid_xsave(void) +{ + uint32_t eax, ebx, ecx, edx; + + eax = 1; + ecx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + if (!(ecx & X86_FEATURE_XSAVE)) + GUEST_ASSERT(!"cpuid: no CPU xsave support!"); + if (!(ecx & X86_FEATURE_OSXSAVE)) + GUEST_ASSERT(!"cpuid: no OS xsave support!"); +} + +static bool check_xsave_supports_xtile(void) +{ + return __xgetbv(0) & XFEATURE_MASK_XTILE; +} + +static bool enum_xtile_config(void) +{ + u32 eax, ebx, ecx, edx; + + eax = TILE_CPUID; + ecx = TILE_PALETTE_CPUID_SUBLEAVE; + + cpuid(&eax, &ebx, &ecx, &edx); + if (!eax || !ebx || !ecx) + return false; + + xtile.max_names = ebx >> 16; + if (xtile.max_names < NUM_TILES) + return false; + + xtile.bytes_per_tile = eax >> 16; + if (xtile.bytes_per_tile < TILE_SIZE) + return false; + + xtile.bytes_per_row = ebx; + xtile.max_rows = ecx; + + return true; +} + +static bool enum_xsave_tile(void) +{ + u32 eax, ebx, ecx, edx; + + eax = XSTATE_CPUID; + ecx = XFEATURE_XTILEDATA; + + cpuid(&eax, &ebx, &ecx, &edx); + if (!eax || !ebx) + return false; + + xtile.xsave_offset = ebx; + xtile.xsave_size = eax; + + return true; +} + +static bool check_xsave_size(void) +{ + u32 eax, ebx, ecx, edx; + bool valid = false; + + eax = XSTATE_CPUID; + ecx = XSTATE_USER_STATE_SUBLEAVE; + + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx && ebx <= XSAVE_SIZE) + valid = true; + + return valid; +} + +static bool check_xtile_info(void) +{ + bool ret = false; + + if (!check_xsave_size()) + return ret; + + if (!enum_xsave_tile()) + return ret; + + if (!enum_xtile_config()) + return ret; + + if (sizeof(struct tile_data) >= xtile.xsave_size) + ret = true; + + return ret; +} + +static void set_tilecfg(struct tile_config *cfg) +{ + int i; + + /* Only palette id 1 */ + cfg->palette_id = 1; + for (i = 0; i < xtile.max_names; i++) { + cfg->colsb[i] = xtile.bytes_per_row; + cfg->rows[i] = xtile.max_rows; + } +} + +static void set_xstatebv(void *data, uint64_t bv) +{ + *(uint64_t *)(data + XSAVE_HDR_OFFSET) = bv; +} + +static u64 get_xstatebv(void *data) +{ + return *(u64 *)(data + XSAVE_HDR_OFFSET); +} + +static void init_regs(void) +{ + uint64_t cr4, xcr0; + + /* turn on CR4.OSXSAVE */ + cr4 = get_cr4(); + cr4 |= X86_CR4_OSXSAVE; + set_cr4(cr4); + + xcr0 = __xgetbv(0); + xcr0 |= XFEATURE_MASK_XTILE; + __xsetbv(0x0, xcr0); +} + +static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, + struct tile_data *tiledata, + struct xsave_data *xsave_data) +{ + init_regs(); + check_cpuid_xsave(); + GUEST_ASSERT(check_xsave_supports_xtile()); + GUEST_ASSERT(check_xtile_info()); + + /* check xtile configs */ + GUEST_ASSERT(xtile.xsave_offset == 2816); + GUEST_ASSERT(xtile.xsave_size == 8192); + GUEST_ASSERT(xtile.max_names == 8); + GUEST_ASSERT(xtile.bytes_per_tile == 1024); + GUEST_ASSERT(xtile.bytes_per_row == 64); + GUEST_ASSERT(xtile.max_rows == 16); + GUEST_SYNC(1); + + /* xfd=0, enable amx */ + wrmsr(MSR_IA32_XFD, 0); + GUEST_SYNC(2); + GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0); + set_tilecfg(amx_cfg); + __ldtilecfg(amx_cfg); + GUEST_SYNC(3); + /* Check save/restore when trap to userspace */ + __tileloadd(tiledata); + GUEST_SYNC(4); + __tilerelease(); + GUEST_SYNC(5); + /* bit 18 not in the XCOMP_BV after xsavec() */ + set_xstatebv(xsave_data, XFEATURE_MASK_XTILEDATA); + __xsavec(xsave_data, XFEATURE_MASK_XTILEDATA); + GUEST_ASSERT((get_xstatebv(xsave_data) & XFEATURE_MASK_XTILEDATA) == 0); + + /* xfd=0x40000, disable amx tiledata */ + wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILEDATA); + GUEST_SYNC(6); + GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILEDATA); + set_tilecfg(amx_cfg); + __ldtilecfg(amx_cfg); + /* Trigger #NM exception */ + __tileloadd(tiledata); + GUEST_SYNC(10); + + GUEST_DONE(); +} + +void guest_nm_handler(struct ex_regs *regs) +{ + /* Check if #NM is triggered by XFEATURE_MASK_XTILEDATA */ + GUEST_SYNC(7); + GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILEDATA); + GUEST_SYNC(8); + GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILEDATA); + /* Clear xfd_err */ + wrmsr(MSR_IA32_XFD_ERR, 0); + /* xfd=0, enable amx */ + wrmsr(MSR_IA32_XFD, 0); + GUEST_SYNC(9); +} + +int main(int argc, char *argv[]) +{ + struct kvm_cpuid_entry2 *entry; + struct kvm_regs regs1, regs2; + bool amx_supported = false; + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_x86_state *state; + int xsave_restore_size = 0; + vm_vaddr_t amx_cfg, tiledata, xsavedata; + struct ucall uc; + u32 amx_offset; + int stage, ret; + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + + entry = kvm_get_supported_cpuid_entry(1); + if (!(entry->ecx & X86_FEATURE_XSAVE)) { + print_skip("XSAVE feature not supported"); + exit(KSFT_SKIP); + } + + if (kvm_get_cpuid_max_basic() >= 0xd) { + entry = kvm_get_supported_cpuid_index(0xd, 0); + amx_supported = entry && !!(entry->eax & XFEATURE_MASK_XTILE); + if (!amx_supported) { + print_skip("AMX is not supported by the vCPU (eax=0x%x)", entry->eax); + exit(KSFT_SKIP); + } + /* Get xsave/restore max size */ + xsave_restore_size = entry->ecx; + } + + run = vcpu_state(vm, VCPU_ID); + vcpu_regs_get(vm, VCPU_ID, ®s1); + + /* Register #NM handler */ + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_handle_exception(vm, NM_VECTOR, guest_nm_handler); + + /* amx cfg for guest_code */ + amx_cfg = vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, 0, 0); + memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize()); + + /* amx tiledata for guest_code */ + tiledata = vm_vaddr_alloc(vm, 2 * getpagesize(), KVM_UTIL_MIN_VADDR, 0, 0); + memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize()); + + /* xsave data for guest_code */ + xsavedata = vm_vaddr_alloc(vm, 3 * getpagesize(), KVM_UTIL_MIN_VADDR, 0, 0); + memset(addr_gva2hva(vm, xsavedata), 0, 3 * getpagesize()); + vcpu_args_set(vm, VCPU_ID, 3, amx_cfg, tiledata, xsavedata); + + for (stage = 1; ; stage++) { + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Stage %d: unexpected exit reason: %u (%s),\n", + stage, run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + switch (uc.args[1]) { + case 1: + case 2: + case 3: + case 5: + case 6: + case 7: + case 8: + fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]); + break; + case 4: + case 10: + fprintf(stderr, + "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]); + + /* Compacted mode, get amx offset by xsave area + * size subtract 8K amx size. + */ + amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE; + state = vcpu_save_state(vm, VCPU_ID); + void *amx_start = (void *)state->xsave + amx_offset; + void *tiles_data = (void *)addr_gva2hva(vm, tiledata); + /* Only check TMM0 register, 1 tile */ + ret = memcmp(amx_start, tiles_data, TILE_SIZE); + TEST_ASSERT(ret == 0, "memcmp failed, ret=%d\n", ret); + kvm_x86_state_cleanup(state); + break; + case 9: + fprintf(stderr, + "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]); + break; + } + break; + case UCALL_DONE: + fprintf(stderr, "UCALL_DONE\n"); + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + state = vcpu_save_state(vm, VCPU_ID); + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vm, VCPU_ID, ®s1); + + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + kvm_vm_restart(vm, O_RDWR); + vm_vcpu_add(vm, VCPU_ID); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vcpu_load_state(vm, VCPU_ID, state); + run = vcpu_state(vm, VCPU_ID); + kvm_x86_state_cleanup(state); + + memset(®s2, 0, sizeof(regs2)); + vcpu_regs_get(vm, VCPU_ID, ®s2); + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", + (ulong) regs2.rdi, (ulong) regs2.rsi); + } +done: + kvm_vm_free(vm); +} -- Gitee From f57bbcb39e0fab9ec43f9a8472338e3684ed91da Mon Sep 17 00:00:00 2001 From: Like Xu Date: Mon, 17 Jan 2022 15:45:31 +0800 Subject: [PATCH 26/37] KVM: x86/cpuid: Clear XFD for component i if the base feature is missing mainline inclusion from mainline-v5.17-rc1 commit e9737468829c2f6abc0c67e5372f8878dff11653 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit e9737468829c KVM: x86/cpuid: Clear XFD for component i if the base feature is missing. -------------------------------- According to Intel extended feature disable (XFD) spec, the sub-function i (i > 1) of CPUID function 0DH enumerates "details for state component i. ECX[2] enumerates support for XFD support for this state component." If KVM does not report F(XFD) feature (e.g. due to CONFIG_X86_64), then the corresponding XFD support for any state component i should also be removed. Translate this dependency into KVM terms. Fixes: 690a757d610e ("kvm: x86: Add CPUID support for Intel AMX") Signed-off-by: Like Xu Message-Id: <20220117074531.76925-1-likexu@tencent.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/kvm/cpuid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index e72a6f78faf6..e2fbd1fa75ed 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -836,6 +836,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) --array->nent; continue; } + + if (!kvm_cpu_cap_has(X86_FEATURE_XFD)) + entry->ecx &= ~BIT_ULL(2); entry->edx = 0; } break; -- Gitee From 7390fb33b5474d7fe9003f628aa54b90af235f19 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Jan 2022 07:44:34 -0500 Subject: [PATCH 27/37] selftests: kvm: move vm_xsave_req_perm call to amx_test mainline inclusion from mainline-v5.17-rc1 commit dd4516aee365fc9c944c9d6036b6b87363398680 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit dd4516aee365 selftests: kvm: move vm_xsave_req_perm call to amx_test. -------------------------------- There is no need for tests other than amx_test to enable dynamic xsave states. Remove the call to vm_xsave_req_perm from generic code, and move it inside the test. While at it, allow customizing the bit that is requested, so that future tests can use it differently. Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- tools/testing/selftests/kvm/include/kvm_util.h | 1 - .../selftests/kvm/include/x86_64/processor.h | 2 ++ .../selftests/kvm/lib/x86_64/processor.c | 17 ++++++----------- tools/testing/selftests/kvm/x86_64/amx_test.c | 2 ++ 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 40021f85033e..85b621383fa1 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -256,7 +256,6 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, * guest_code - The vCPU's entry point */ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); -void vm_xsave_req_perm(void); bool vm_is_unrestricted_guest(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index ebd86c326e02..55c69d982c3b 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -391,6 +391,8 @@ bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent); uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3); +void vm_xsave_req_perm(int bit); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 82e9e5b1369c..107a2dfc663d 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -595,16 +595,16 @@ static bool is_xfd_supported(void) return !!(eax & CPUID_XFD_BIT); } -void vm_xsave_req_perm(void) +void vm_xsave_req_perm(int bit) { - unsigned long bitmask; + u64 bitmask; long rc; if (!is_xfd_supported()) - return; + exit(KSFT_SKIP); + + rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); - rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, - XSTATE_XTILE_DATA_BIT); /* * The older kernel version(<5.15) can't support * ARCH_REQ_XCOMP_GUEST_PERM and directly return. @@ -614,7 +614,7 @@ void vm_xsave_req_perm(void) rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); - TEST_ASSERT(bitmask & XFEATURE_XTILE_MASK, + TEST_ASSERT(bitmask & (1ULL << bit), "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", bitmask); } @@ -627,11 +627,6 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); - /* - * Permission needs to be requested before KVM_SET_CPUID2. - */ - vm_xsave_req_perm(); - /* Create VCPU */ vm_vcpu_add(vm, vcpuid); vcpu_setup(vm, vcpuid, 0, 0); diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 90021e429b28..3a23edfdb70d 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -342,6 +342,8 @@ int main(int argc, char *argv[]) u32 amx_offset; int stage, ret; + vm_xsave_req_perm(XSTATE_XTILE_DATA_BIT); + /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); -- Gitee From 59cd260534fd156cc4d89b5e483e6562ea2ca6d3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 1 Sep 2021 13:30:28 -0700 Subject: [PATCH 28/37] tools: Move x86 syscall number fallbacks to .../uapi/ mainline inclusion from mainline-v5.15-rc3 commit de5f4213dafa8f8b0b52cdaf06bb35ad4cab1681 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit de5f4213dafa tools: Move x86 syscall number fallbacks to .../uapi/. -------------------------------- Move unistd_{32,64}.h from x86/include/asm to x86/include/uapi/asm so that tools/selftests that install kernel headers, e.g. KVM selftests, can include non-uapi tools headers, e.g. to get 'struct list_head', without effectively overriding the installed non-tool uapi headers. Swapping KVM's search order, e.g. to search the kernel headers before tool headers, is not a viable option as doing results in linux/type.h and other core headers getting pulled from the kernel headers, which do not have the kernel-internal typedefs that are used through tools, including many files outside of selftests/kvm's control. Prior to commit cec07f53c398 ("perf tools: Move syscall number fallbacks from perf-sys.h to tools/arch/x86/include/asm/"), the handcoded numbers were actual fallbacks, i.e. overriding unistd_{32,64}.h from the kernel headers was unintentional. Signed-off-by: Sean Christopherson Message-Id: <20210901203030.1292304-4-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- tools/arch/x86/include/{ => uapi}/asm/unistd_32.h | 0 tools/arch/x86/include/{ => uapi}/asm/unistd_64.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tools/arch/x86/include/{ => uapi}/asm/unistd_32.h (100%) rename tools/arch/x86/include/{ => uapi}/asm/unistd_64.h (100%) diff --git a/tools/arch/x86/include/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h similarity index 100% rename from tools/arch/x86/include/asm/unistd_32.h rename to tools/arch/x86/include/uapi/asm/unistd_32.h diff --git a/tools/arch/x86/include/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h similarity index 100% rename from tools/arch/x86/include/asm/unistd_64.h rename to tools/arch/x86/include/uapi/asm/unistd_64.h -- Gitee From ea19708989496435fd5c14c6b11b11fef97ec3cd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 7 Aug 2020 08:45:47 -0300 Subject: [PATCH 29/37] tools arch x86: Sync the msr-index.h copy with the kernel sources mainline inclusion from mainline-v5.16-rc1 commit 3442b5e05a7b90dcf3fd74c4d5a4a9796358eef2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 3442b5e05a7b tools arch x86: Sync the msr-index.h copy with the kernel sources. -------------------------------- To pick up the changes in: dae1bd58389615d4 ("x86/msr-index: Add MSRs for XFD") Addressing these tools/perf build warnings: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' That makes the beautification scripts to pick some new entries: $ diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h --- tools/arch/x86/include/asm/msr-index.h 2021-07-15 16:17:01.819817827 -0300 +++ arch/x86/include/asm/msr-index.h 2021-11-06 15:49:33.738517311 -0300 @@ -625,6 +625,8 @@ #define MSR_IA32_BNDCFGS_RSVD 0x00000ffc +#define MSR_IA32_XFD 0x000001c4 +#define MSR_IA32_XFD_ERR 0x000001c5 #define MSR_IA32_XSS 0x00000da0 #define MSR_IA32_APICBASE 0x0000001b $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > /tmp/before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > /tmp/after $ diff -u /tmp/before /tmp/after --- /tmp/before 2021-11-13 11:10:39.964201505 -0300 +++ /tmp/after 2021-11-13 11:10:47.902410873 -0300 @@ -93,6 +93,8 @@ [0x000001b0] = "IA32_ENERGY_PERF_BIAS", [0x000001b1] = "IA32_PACKAGE_THERM_STATUS", [0x000001b2] = "IA32_PACKAGE_THERM_INTERRUPT", + [0x000001c4] = "IA32_XFD", + [0x000001c5] = "IA32_XFD_ERR", [0x000001c8] = "LBR_SELECT", [0x000001c9] = "LBR_TOS", [0x000001d9] = "IA32_DEBUGCTLMSR", $ And this gets rebuilt: CC /tmp/build/perf/trace/beauty/tracepoints/x86_msr.o INSTALL trace_plugins LD /tmp/build/perf/trace/beauty/tracepoints/perf-in.o LD /tmp/build/perf/trace/beauty/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/perf Now one can trace systemwide asking to see backtraces to where those MSRs are being read/written with: # perf trace -e msr:*_msr/max-stack=32/ --filter="msr==IA32_XFD || msr==IA32_XFD_ERR" ^C# # If we use -v (verbose mode) we can see what it does behind the scenes: # perf trace -v -e msr:*_msr/max-stack=32/ --filter="msr==IA32_XFD || msr==IA32_XFD_ERR" New filter for msr:read_msr: (msr==0x1c4 || msr==0x1c5) && (common_pid != 4448951 && common_pid != 8781) New filter for msr:write_msr: (msr==0x1c4 || msr==0x1c5) && (common_pid != 4448951 && common_pid != 8781) ^C# Example with a frequent msr: # perf trace -v -e msr:*_msr/max-stack=32/ --filter="msr==IA32_SPEC_CTRL" --max-events 2 Using CPUID AuthenticAMD-25-21-0 0x48 New filter for msr:read_msr: (msr==0x48) && (common_pid != 3738351 && common_pid != 3564) 0x48 New filter for msr:write_msr: (msr==0x48) && (common_pid != 3738351 && common_pid != 3564) mmap size 528384B Looking at the vmlinux_path (8 entries long) symsrc__init: build id mismatch for vmlinux. Using /proc/kcore for kernel data Using /proc/kallsyms for symbols 0.000 pipewire/2479 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) __switch_to_xtra ([kernel.kallsyms]) __switch_to ([kernel.kallsyms]) __schedule ([kernel.kallsyms]) schedule ([kernel.kallsyms]) schedule_hrtimeout_range_clock ([kernel.kallsyms]) do_epoll_wait ([kernel.kallsyms]) __x64_sys_epoll_wait ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) epoll_wait (/usr/lib64/libc-2.33.so) [0x76c4] (/usr/lib64/spa-0.2/support/libspa-support.so) [0x4cf0] (/usr/lib64/spa-0.2/support/libspa-support.so) 0.027 :0/0 msr:write_msr(msr: IA32_SPEC_CTRL, val: 2) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) __switch_to_xtra ([kernel.kallsyms]) __switch_to ([kernel.kallsyms]) __schedule ([kernel.kallsyms]) schedule_idle ([kernel.kallsyms]) do_idle ([kernel.kallsyms]) cpu_startup_entry ([kernel.kallsyms]) start_kernel ([kernel.kallsyms]) secondary_startup_64_no_verify ([kernel.kallsyms]) # Cc: Borislav Petkov Cc: Chang S. Bae Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/YY%2FJdb6on7swsn+C@kernel.org/ Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Lin Wang --- tools/arch/x86/include/asm/msr-index.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index b8954262d767..f0195728e1c1 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -659,6 +659,8 @@ #define MSR_IA32_BNDCFGS_RSVD 0x00000ffc +#define MSR_IA32_XFD 0x000001c4 +#define MSR_IA32_XFD_ERR 0x000001c5 #define MSR_IA32_XSS 0x00000da0 #define MSR_IA32_APICBASE 0x0000001b -- Gitee From 39058f91fddb477787ae156912c550660ed0e53a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Nov 2021 10:43:52 -0300 Subject: [PATCH 30/37] tools headers UAPI: Sync arch prctl headers with the kernel sources mainline inclusion from mainline-v5.16-rc1 commit 5b749efe2df87628aa79749801cf8084d56badd0 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 5b749efe2df8 tools headers UAPI: Sync arch prctl headers with the kernel sources. -------------------------------- To pick the changes in this cset: db8268df0983adc2 ("x86/arch_prctl: Add controls for dynamic XSTATE components") This picks these new prctls: $ tools/perf/trace/beauty/x86_arch_prctl.sh > /tmp/before $ cp arch/x86/include/uapi/asm/prctl.h tools/arch/x86/include/uapi/asm/prctl.h $ tools/perf/trace/beauty/x86_arch_prctl.sh > /tmp/after $ diff -u /tmp/before /tmp/after --- /tmp/before 2021-11-13 10:42:52.787308809 -0300 +++ /tmp/after 2021-11-13 10:43:02.295558837 -0300 @@ -6,6 +6,9 @@ [0x1004 - 0x1001]= "GET_GS", [0x1011 - 0x1001]= "GET_CPUID", [0x1012 - 0x1001]= "SET_CPUID", + [0x1021 - 0x1001]= "GET_XCOMP_SUPP", + [0x1022 - 0x1001]= "GET_XCOMP_PERM", + [0x1023 - 0x1001]= "REQ_XCOMP_PERM", }; #define x86_arch_prctl_codes_2_offset 0x2001 $ With this 'perf trace' can translate those numbers into strings and use the strings in filter expressions: # perf trace -e prctl 0.000 ( 0.011 ms): DOM Worker/3722622 prctl(option: SET_NAME, arg2: 0x7f9c014b7df5) = 0 0.032 ( 0.002 ms): DOM Worker/3722622 prctl(option: SET_NAME, arg2: 0x7f9bb6b51580) = 0 5.452 ( 0.003 ms): StreamT~ns #30/3722623 prctl(option: SET_NAME, arg2: 0x7f9bdbdfeb70) = 0 5.468 ( 0.002 ms): StreamT~ns #30/3722623 prctl(option: SET_NAME, arg2: 0x7f9bdbdfea70) = 0 24.494 ( 0.009 ms): IndexedDB #556/3722624 prctl(option: SET_NAME, arg2: 0x7f562a32ae28) = 0 24.540 ( 0.002 ms): IndexedDB #556/3722624 prctl(option: SET_NAME, arg2: 0x7f563c6d4b30) = 0 670.281 ( 0.008 ms): systemd-userwo/3722339 prctl(option: SET_NAME, arg2: 0x564be30805c8) = 0 670.293 ( 0.002 ms): systemd-userwo/3722339 prctl(option: SET_NAME, arg2: 0x564be30800f0) = 0 ^C# This addresses these perf build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/prctl.h' differs from latest version at 'arch/x86/include/uapi/asm/prctl.h' diff -u tools/arch/x86/include/uapi/asm/prctl.h arch/x86/include/uapi/asm/prctl.h Cc: Borislav Petkov Cc: Chang S. Bae Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/YY%2FER104k852WOTK@kernel.org/T/#u Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Lin Wang --- tools/arch/x86/include/uapi/asm/prctl.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h index 5a6aac9fa41f..754a07856817 100644 --- a/tools/arch/x86/include/uapi/asm/prctl.h +++ b/tools/arch/x86/include/uapi/asm/prctl.h @@ -10,6 +10,10 @@ #define ARCH_GET_CPUID 0x1011 #define ARCH_SET_CPUID 0x1012 +#define ARCH_GET_XCOMP_SUPP 0x1021 +#define ARCH_GET_XCOMP_PERM 0x1022 +#define ARCH_REQ_XCOMP_PERM 0x1023 + #define ARCH_MAP_VDSO_X32 0x2001 #define ARCH_MAP_VDSO_32 0x2002 #define ARCH_MAP_VDSO_64 0x2003 -- Gitee From e711569610162e2c88d82c5002e12ad7357719db Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Nov 2021 10:43:52 -0300 Subject: [PATCH 31/37] tools headers UAPI: Sync x86 arch prctl headers with the kernel sources mainline inclusion from mainline-v5.17-rc1 commit 8326c79d10be2ddbfd3d3804206949a71cb15675 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 8326c79d10be tools headers UAPI: Sync x86 arch prctl headers with the kernel sources. -------------------------------- To pick the changes in this cset: 980fe2fddcff2193 ("x86/fpu: Extend fpu_xstate_prctl() with guest permissions") This picks these new prctls: $ tools/perf/trace/beauty/x86_arch_prctl.sh > /tmp/before $ cp arch/x86/include/uapi/asm/prctl.h tools/arch/x86/include/uapi/asm/prctl.h $ tools/perf/trace/beauty/x86_arch_prctl.sh > /tmp/after $ diff -u /tmp/before /tmp/after --- /tmp/before 2022-01-19 14:40:05.049394977 -0300 +++ /tmp/after 2022-01-19 14:40:35.628154565 -0300 @@ -9,6 +9,8 @@ [0x1021 - 0x1001]= "GET_XCOMP_SUPP", [0x1022 - 0x1001]= "GET_XCOMP_PERM", [0x1023 - 0x1001]= "REQ_XCOMP_PERM", + [0x1024 - 0x1001]= "GET_XCOMP_GUEST_PERM", + [0x1025 - 0x1001]= "REQ_XCOMP_GUEST_PERM", }; #define x86_arch_prctl_codes_2_offset 0x2001 $ With this 'perf trace' can translate those numbers into strings and use the strings in filter expressions: # perf trace -e prctl 0.000 ( 0.011 ms): DOM Worker/3722622 prctl(option: SET_NAME, arg2: 0x7f9c014b7df5) = 0 0.032 ( 0.002 ms): DOM Worker/3722622 prctl(option: SET_NAME, arg2: 0x7f9bb6b51580) = 0 5.452 ( 0.003 ms): StreamT~ns #30/3722623 prctl(option: SET_NAME, arg2: 0x7f9bdbdfeb70) = 0 5.468 ( 0.002 ms): StreamT~ns #30/3722623 prctl(option: SET_NAME, arg2: 0x7f9bdbdfea70) = 0 24.494 ( 0.009 ms): IndexedDB #556/3722624 prctl(option: SET_NAME, arg2: 0x7f562a32ae28) = 0 24.540 ( 0.002 ms): IndexedDB #556/3722624 prctl(option: SET_NAME, arg2: 0x7f563c6d4b30) = 0 670.281 ( 0.008 ms): systemd-userwo/3722339 prctl(option: SET_NAME, arg2: 0x564be30805c8) = 0 670.293 ( 0.002 ms): systemd-userwo/3722339 prctl(option: SET_NAME, arg2: 0x564be30800f0) = 0 ^C# This addresses these perf build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/prctl.h' differs from latest version at 'arch/x86/include/uapi/asm/prctl.h' diff -u tools/arch/x86/include/uapi/asm/prctl.h arch/x86/include/uapi/asm/prctl.h Cc: Paolo Bonzini Cc: Thomas Gleixner Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Lin Wang --- tools/arch/x86/include/uapi/asm/prctl.h | 26 +++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h index 754a07856817..500b96e71f18 100644 --- a/tools/arch/x86/include/uapi/asm/prctl.h +++ b/tools/arch/x86/include/uapi/asm/prctl.h @@ -2,20 +2,22 @@ #ifndef _ASM_X86_PRCTL_H #define _ASM_X86_PRCTL_H -#define ARCH_SET_GS 0x1001 -#define ARCH_SET_FS 0x1002 -#define ARCH_GET_FS 0x1003 -#define ARCH_GET_GS 0x1004 +#define ARCH_SET_GS 0x1001 +#define ARCH_SET_FS 0x1002 +#define ARCH_GET_FS 0x1003 +#define ARCH_GET_GS 0x1004 -#define ARCH_GET_CPUID 0x1011 -#define ARCH_SET_CPUID 0x1012 +#define ARCH_GET_CPUID 0x1011 +#define ARCH_SET_CPUID 0x1012 -#define ARCH_GET_XCOMP_SUPP 0x1021 -#define ARCH_GET_XCOMP_PERM 0x1022 -#define ARCH_REQ_XCOMP_PERM 0x1023 +#define ARCH_GET_XCOMP_SUPP 0x1021 +#define ARCH_GET_XCOMP_PERM 0x1022 +#define ARCH_REQ_XCOMP_PERM 0x1023 +#define ARCH_GET_XCOMP_GUEST_PERM 0x1024 +#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 -#define ARCH_MAP_VDSO_X32 0x2001 -#define ARCH_MAP_VDSO_32 0x2002 -#define ARCH_MAP_VDSO_64 0x2003 +#define ARCH_MAP_VDSO_X32 0x2001 +#define ARCH_MAP_VDSO_32 0x2002 +#define ARCH_MAP_VDSO_64 0x2003 #endif /* _ASM_X86_PRCTL_H */ -- Gitee From d108c31b743e676c84b6f1448bf35700b15b3e9a Mon Sep 17 00:00:00 2001 From: Like Xu Date: Tue, 25 Jan 2022 19:52:23 +0800 Subject: [PATCH 32/37] KVM: x86/cpuid: Exclude unpermitted xfeatures sizes at KVM_GET_SUPPORTED_CPUID mainline inclusion from mainline-v5.17-rc1 commit 1ffce0924a8c86cf0590c039cd5f5c9375d32e9b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 1ffce0924a8c KVM: x86/cpuid: Exclude unpermitted xfeatures sizes at KVM_GET_SUPPORTED_CPUID. -------------------------------- With the help of xstate_get_guest_group_perm(), KVM can exclude unpermitted xfeatures in cpuid.0xd.0.eax, in which case the corresponding xfeatures sizes should also be matched to the permitted xfeatures. To fix this inconsistency, the permitted_xcr0 and permitted_xss are defined consistently, which implies 'supported' plus certain permissions for this task, and it also fixes cpuid.0xd.1.ebx and later leaf-by-leaf queries. Fixes: 445ecdf79be0 ("kvm: x86: Exclude unpermitted xfeatures at KVM_GET_SUPPORTED_CPUID") Signed-off-by: Like Xu Message-Id: <20220125115223.33707-1-likexu@tencent.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/kvm/cpuid.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index e2fbd1fa75ed..b05e63667e48 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -787,13 +787,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) } break; case 0xd: { - u64 guest_perm = xstate_get_guest_group_perm(); + u64 permitted_xcr0 = supported_xcr0 & xstate_get_guest_group_perm(); + u64 permitted_xss = supported_xss; - entry->eax &= supported_xcr0 & guest_perm; - entry->ebx = xstate_required_size(supported_xcr0, false); + entry->eax &= permitted_xcr0; + entry->ebx = xstate_required_size(permitted_xcr0, false); entry->ecx = entry->ebx; - entry->edx &= (supported_xcr0 & guest_perm) >> 32; - if (!supported_xcr0) + entry->edx &= permitted_xcr0 >> 32; + if (!permitted_xcr0) break; entry = do_host_cpuid(array, function, 1); @@ -802,20 +803,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) cpuid_entry_override(entry, CPUID_D_1_EAX); if (entry->eax & (F(XSAVES)|F(XSAVEC))) - entry->ebx = xstate_required_size(supported_xcr0 | supported_xss, + entry->ebx = xstate_required_size(permitted_xcr0 | permitted_xss, true); else { - WARN_ON_ONCE(supported_xss != 0); + WARN_ON_ONCE(permitted_xss != 0); entry->ebx = 0; } - entry->ecx &= supported_xss; - entry->edx &= supported_xss >> 32; + entry->ecx &= permitted_xss; + entry->edx &= permitted_xss >> 32; for (i = 2; i < 64; ++i) { bool s_state; - if (supported_xcr0 & BIT_ULL(i)) + if (permitted_xcr0 & BIT_ULL(i)) s_state = false; - else if (supported_xss & BIT_ULL(i)) + else if (permitted_xss & BIT_ULL(i)) s_state = true; else continue; @@ -829,7 +830,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) * invalid sub-leafs. Only valid sub-leafs should * reach this point, and they should have a non-zero * save state size. Furthermore, check whether the - * processor agrees with supported_xcr0/supported_xss + * processor agrees with permitted_xcr0/permitted_xss * on whether this is an XCR0- or IA32_XSS-managed area. */ if (WARN_ON_ONCE(!entry->eax || (entry->ecx & 0x1) != s_state)) { -- Gitee From c7cc4d56c202e5b03b4ba883a2ce8ee7c7d38b19 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Jan 2022 07:31:53 -0800 Subject: [PATCH 33/37] KVM: x86: Add a helper to retrieve userspace address from kvm_device_attr mainline inclusion from mainline-v5.17-rc2 commit 56f289a8d23addfa4408a08f07f42fcfe2a7bd69 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 56f289a8d23a KVM: x86: Add a helper to retrieve userspace address from kvm_device_attr. -------------------------------- Add a helper to handle converting the u64 userspace address embedded in struct kvm_device_attr into a userspace pointer, it's all too easy to forget the intermediate "unsigned long" cast as well as the truncation check. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/kvm/x86.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f51ffcf900b3..a9e397416028 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4006,7 +4006,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; } return r; +} + +static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr) +{ + void __user *uaddr = (void __user*)(unsigned long)attr->addr; + if ((u64)(unsigned long)uaddr != attr->addr) + return ERR_PTR(-EFAULT); + return uaddr; } long kvm_arch_dev_ioctl(struct file *filp, -- Gitee From 953b514a05ab3f23a2c6eb49f9f9e64c84ba6e5d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Jan 2022 07:49:45 -0500 Subject: [PATCH 34/37] KVM: x86: add system attribute to retrieve full set of supported xsave states mainline inclusion from mainline-v5.17-rc2 commit dd6e631220181162478984d2d46dd979e04d8e75 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit dd6e63122018 KVM: x86: add system attribute to retrieve full set of supported xsave states. -------------------------------- Because KVM_GET_SUPPORTED_CPUID is meant to be passed (by simple-minded VMMs) to KVM_SET_CPUID2, it cannot include any dynamic xsave states that have not been enabled. Probing those, for example so that they can be passed to ARCH_REQ_XCOMP_GUEST_PERM, requires a new ioctl or arch_prctl. The latter is in fact worse, even though that is what the rest of the API uses, because it would require supported_xcr0 to be moved from the KVM module to the kernel just for this use. In addition, the value would be nonsensical (or an error would have to be returned) until the KVM module is loaded in. Therefore, to limit the growth of system ioctls, add a /dev/kvm variant of KVM_{GET,HAS}_DEVICE_ATTR, and implement it in x86 with just one group (0) and attribute (KVM_X86_XCOMP_GUEST_SUPP). Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- Documentation/virt/kvm/api.rst | 4 ++- arch/x86/include/uapi/asm/kvm.h | 3 ++ arch/x86/kvm/x86.c | 51 +++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 1 + 4 files changed, 58 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 9846cc4f5389..76302aafc7e7 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -3195,6 +3195,7 @@ number. :Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, KVM_CAP_VCPU_ATTRIBUTES for vcpu device + KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device (no set) :Type: device ioctl, vm ioctl, vcpu ioctl :Parameters: struct kvm_device_attr :Returns: 0 on success, -1 on error @@ -3229,7 +3230,8 @@ transferred is defined by the particular attribute. ------------------------ :Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, - KVM_CAP_VCPU_ATTRIBUTES for vcpu device + KVM_CAP_VCPU_ATTRIBUTES for vcpu device + KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device :Type: device ioctl, vm ioctl, vcpu ioctl :Parameters: struct kvm_device_attr :Returns: 0 on success, -1 on error diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 1552d6024e21..e2f67a815c65 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -441,6 +441,9 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 +/* attributes for system fd (group 0) */ +#define KVM_X86_XCOMP_GUEST_SUPP 0 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a9e397416028..2803f469e6c9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3916,6 +3916,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_X86_SGX_KVM case KVM_CAP_SGX_ATTRIBUTE: #endif + case KVM_CAP_SYS_ATTRIBUTES: r = 1; break; case KVM_CAP_SYNC_REGS: @@ -4017,6 +4018,40 @@ static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr) return uaddr; } +static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr) +{ + u64 __user *uaddr = kvm_get_attr_addr(attr); + + if (attr->group) + return -ENXIO; + + if (IS_ERR(uaddr)) + return PTR_ERR(uaddr); + + switch (attr->attr) { + case KVM_X86_XCOMP_GUEST_SUPP: + if (put_user(supported_xcr0, uaddr)) + return -EFAULT; + return 0; + default: + return -ENXIO; + break; + } +} + +static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr) +{ + if (attr->group) + return -ENXIO; + + switch (attr->attr) { + case KVM_X86_XCOMP_GUEST_SUPP: + return 0; + default: + return -ENXIO; + } +} + long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4102,6 +4137,22 @@ long kvm_arch_dev_ioctl(struct file *filp, case KVM_GET_MSRS: r = msr_io(NULL, argp, do_get_msr_feature, 1); break; + case KVM_GET_DEVICE_ATTR: { + struct kvm_device_attr attr; + r = -EFAULT; + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + break; + r = kvm_x86_dev_get_attr(&attr); + break; + } + case KVM_HAS_DEVICE_ATTR: { + struct kvm_device_attr attr; + r = -EFAULT; + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + break; + r = kvm_x86_dev_has_attr(&attr); + break; + } default: r = -EINVAL; break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0656ef70caf9..aac92756317a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1071,6 +1071,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_XSAVE2 208 +#define KVM_CAP_SYS_ATTRIBUTES 209 #define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218 #define KVM_CAP_X86_NOTIFY_VMEXIT 219 -- Gitee From 15748f314e5c540ba5266bc98ae6f1910b801115 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Jan 2022 07:51:00 -0500 Subject: [PATCH 35/37] selftests: kvm: check dynamic bits against KVM_X86_XCOMP_GUEST_SUPP mainline inclusion from mainline-v5.17-rc2 commit b19c99b9f4486f23e3b7248dd4ce3d83e19b9032 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit b19c99b9f448 selftests: kvm: check dynamic bits against KVM_X86_XCOMP_GUEST_SUPP. -------------------------------- Provide coverage for the new API. Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- tools/arch/x86/include/uapi/asm/kvm.h | 3 +++ tools/include/uapi/linux/kvm.h | 1 + .../selftests/kvm/lib/x86_64/processor.c | 23 +++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 57971903e01f..d56a9b69e526 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -437,6 +437,9 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 +/* attributes for system fd (group 0) */ +#define KVM_X86_XCOMP_GUEST_SUPP 0 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 7986e3b6d706..fdd43f1d3d82 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1054,6 +1054,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #define KVM_CAP_XSAVE2 207 +#define KVM_CAP_SYS_ATTRIBUTES 209 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 107a2dfc663d..af0feffa26bc 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -597,8 +597,31 @@ static bool is_xfd_supported(void) void vm_xsave_req_perm(int bit) { + int kvm_fd; u64 bitmask; long rc; + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, + .addr = (unsigned long) &bitmask + }; + + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) { + print_skip("%s not available, is KVM loaded? (errno: %d)", + KVM_DEV_PATH, errno); + exit(KSFT_SKIP); + } + + rc = ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); + close(kvm_fd); + if (rc == -1 && (errno == ENXIO || errno == EINVAL)) + exit(KSFT_SKIP); + + TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); + + if (!(bitmask & (1ULL << bit))) + exit(KSFT_SKIP); if (!is_xfd_supported()) exit(KSFT_SKIP); -- Gitee From 431c2fb411f094cab726e707fdce45ce6a017936 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 2 Feb 2022 00:51:57 +0000 Subject: [PATCH 36/37] KVM: x86: Use ERR_PTR_USR() to return -EFAULT as a __user pointer mainline inclusion from mainline-v5.17-rc3 commit 6e37ec8825a113bc2dd1b280be10e5ac6eb4f6b1 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: commit 6e37ec8825a1 KVM: x86: Use ERR_PTR_USR() to return -EFAULT as a __user pointer. -------------------------------- Use ERR_PTR_USR() when returning -EFAULT from kvm_get_attr_addr(), sparse complains about implicitly casting the kernel pointer from ERR_PTR() into a __user pointer. >> arch/x86/kvm/x86.c:4342:31: sparse: sparse: incorrect type in return expression (different address spaces) @@ expected void [noderef] __user * @@ got void * @@ arch/x86/kvm/x86.c:4342:31: sparse: expected void [noderef] __user * arch/x86/kvm/x86.c:4342:31: sparse: got void * >> arch/x86/kvm/x86.c:4342:31: sparse: sparse: incorrect type in return expression (different address spaces) @@ expected void [noderef] __user * @@ got void * @@ arch/x86/kvm/x86.c:4342:31: sparse: expected void [noderef] __user * arch/x86/kvm/x86.c:4342:31: sparse: got void * No functional change intended. Fixes: 56f289a8d23a ("KVM: x86: Add a helper to retrieve userspace address from kvm_device_attr") Reported-by: kernel test robot Signed-off-by: Sean Christopherson Message-Id: <20220202005157.2545816-1-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Lin Wang --- arch/x86/kvm/x86.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2803f469e6c9..bdc7b9c1f82a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -88,6 +88,8 @@ u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P; EXPORT_SYMBOL_GPL(kvm_mce_cap_supported); +#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e)) + #define emul_to_vcpu(ctxt) \ ((struct kvm_vcpu *)(ctxt)->vcpu) @@ -4014,7 +4016,7 @@ static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr) void __user *uaddr = (void __user*)(unsigned long)attr->addr; if ((u64)(unsigned long)uaddr != attr->addr) - return ERR_PTR(-EFAULT); + return ERR_PTR_USR(-EFAULT); return uaddr; } -- Gitee From 7ea464f567fb4d8cd8f64275d7d3f290ab6332a0 Mon Sep 17 00:00:00 2001 From: Lin Wang Date: Sat, 19 Nov 2022 14:49:16 +0800 Subject: [PATCH 37/37] x86/fpu: Fix KABI broken caused by introducing of guest permissions in struct fpu category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ CVE: NA Intel-SIG: x86/fpu: Fix KABI broken caused by introducing of guest permissions in struct fpu. -------------------------------- Using KABI_EXTEND macro to avoid kabi change caused by the commit: 980fe2fddcff x86/fpu: Extend fpu_xstate_prctl() with guest permissions Signed-off-by: Lin Wang --- arch/x86/include/asm/fpu/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index dde4bd43c7cc..1ab041f14482 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -485,7 +485,7 @@ struct fpu { * * Permission related information for guest pseudo FPUs */ - struct fpu_state_perm guest_perm; + KABI_EXTEND(struct fpu_state_perm guest_perm) /* * @__fpstate: -- Gitee