From 9de8b06e63f61295e8addaa3407c729e11f3427b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 13 May 2022 14:30:37 +0100 Subject: [PATCH 01/26] irqchip/gic-v3: Refactor ISB + EOIR at ack time mainline inclusion from mainline-v5.19-rc1 commit 6efb50923771f392122f5ce69dfc43b08f16e449 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID47CL CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6efb50923771f392122f5ce69dfc43b08f16e449 ---------------------------------------------------------------------- There are cases where a context synchronization event is necessary between an IRQ being raised and being handled, and there are races such that we cannot rely upon the exception entry being subsequent to the interrupt being raised. To fix this, we place an ISB between a read of IAR and the subsequent invocation of an IRQ handler. When EOI mode 1 is in use, we need to EOI an interrupt prior to invoking its handler, and we have a write to EOIR for this. As this write to EOIR requires an ISB, and this is provided by the gic_write_eoir() helper, we omit the usual ISB in this case, with the logic being: | if (static_branch_likely(&supports_deactivate_key)) | gic_write_eoir(irqnr); | else | isb(); This is somewhat opaque, and it would be a little clearer if there were an unconditional ISB, with only the write to EOIR being conditional, e.g. | if (static_branch_likely(&supports_deactivate_key)) | write_gicreg(irqnr, ICC_EOIR1_EL1); | | isb(); This patch rewrites the code that way, with this logic factored into a new helper function with comments explaining what the ISB is for, as were originally laid out in commit: 39a06b67c2c1256b ("irqchip/gic: Ensure we have an ISB between ack and ->handle_irq") Note that since then, we removed the IAR polling in commit: 342677d70ab92142 ("irqchip/gic-v3: Remove acknowledge loop") ... which removed one of the two race conditions. For consistency, other portions of the driver are made to manipulate EOIR using write_gicreg() and explcit ISBs, and the gic_write_eoir() helper function is removed. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220513133038.226182-3-mark.rutland@arm.com Signed-off-by: Shi Yang --- arch/arm/include/asm/arch_gicv3.h | 7 +---- drivers/irqchip/irq-gic-v3.c | 43 ++++++++++++++++++++++++------- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index 21f2ec96cc96..340352c77252 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -48,6 +48,7 @@ static inline u32 read_ ## a64(void) \ return read_sysreg(a32); \ } \ +CPUIF_MAP(ICC_EOIR1, ICC_EOIR1_EL1) CPUIF_MAP(ICC_PMR, ICC_PMR_EL1) CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1) CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1) @@ -63,12 +64,6 @@ CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1) /* Low-level accessors */ -static inline void gic_write_eoir(u32 irq) -{ - write_sysreg(irq, ICC_EOIR1); - isb(); -} - static inline void gic_write_dir(u32 val) { write_sysreg(val, ICC_DIR); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 39a854ec93ca..75447d091a23 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -543,7 +543,8 @@ static void gic_irq_nmi_teardown(struct irq_data *d) static void gic_eoi_irq(struct irq_data *d) { - gic_write_eoir(gic_irq(d)); + write_gicreg(gic_irq(d), ICC_EOIR1_EL1); + isb(); } static void gic_eoimode1_eoi_irq(struct irq_data *d) @@ -627,10 +628,38 @@ static void gic_deactivate_unhandled(u32 irqnr) if (irqnr < 8192) gic_write_dir(irqnr); } else { - gic_write_eoir(irqnr); + write_gicreg(irqnr, ICC_EOIR1_EL1); + isb(); } } +/* + * Follow a read of the IAR with any HW maintenance that needs to happen prior + * to invoking the relevant IRQ handler. We must do two things: + * + * (1) Ensure instruction ordering between a read of IAR and subsequent + * instructions in the IRQ handler using an ISB. + * + * It is possible for the IAR to report an IRQ which was signalled *after* + * the CPU took an IRQ exception as multiple interrupts can race to be + * recognized by the GIC, earlier interrupts could be withdrawn, and/or + * later interrupts could be prioritized by the GIC. + * + * For devices which are tightly coupled to the CPU, such as PMUs, a + * context synchronization event is necessary to ensure that system + * register state is not stale, as these may have been indirectly written + * *after* exception entry. + * + * (2) Deactivate the interrupt when EOI mode 1 is in use. + */ +static inline void gic_complete_ack(u32 irqnr) +{ + if (static_branch_likely(&supports_deactivate_key)) + write_gicreg(irqnr, ICC_EOIR1_EL1); + + isb(); +} + static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) { bool irqs_enabled = interrupts_enabled(regs); @@ -639,10 +668,7 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) if (irqs_enabled) nmi_enter(); - if (static_branch_likely(&supports_deactivate_key)) - gic_write_eoir(irqnr); - else - isb(); + gic_complete_ack(irqnr); /* * Leave the PSR.I bit set to prevent other NMIs to be @@ -713,10 +739,7 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs gic_arch_enable_irqs(); } - if (static_branch_likely(&supports_deactivate_key)) - gic_write_eoir(irqnr); - else - isb(); + gic_complete_ack(irqnr); if (handle_domain_irq(gic_data.domain, irqnr, regs)) { WARN_ONCE(true, "Unexpected interrupt received!\n"); -- Gitee From 06ed2b27a8a24469f535d27a4d2d7836a43aaf85 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 11 Oct 2022 22:36:56 +0100 Subject: [PATCH 02/26] arm64/booting: Document boot requirements for FEAT_NMI kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90N2C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git/commit/?h=arm64-nmi&id=1ae51a93c0f3b34c9cb85aa29d4113bab5c5c440 ---------------------------------------------------------------------- In order to use FEAT_NMI we must be able to use ALLINT, require that it behave as though not trapped when it is present. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- Documentation/arm64/booting.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index 7552dbc1cc54..92cf9645c3d1 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -270,6 +270,12 @@ Before jumping into the kernel, the following conditions must be met: having 0b1 set for the corresponding bit for each of the auxiliary counters present. + For CPUs with Non-maskable Interrupts (FEAT_NMI): + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.TALLINT must be initialised to 0b0. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. -- Gitee From cedcbe66fac989a9b1b4f537c1b33a74b93867b2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 11 Oct 2022 22:39:28 +0100 Subject: [PATCH 03/26] arm64/sysreg: Add definitions for immediate versions of MSR ALLINT kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90N2C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git/commit/?h=arm64-nmi&id=da2bf1e283bda0fa7e16fec08d6935424b4d0ab8 ---------------------------------------------------------------------- Encodings are provided for ALLINT which allow setting of ALLINT.ALLINT using an immediate rather than requiring that a register be loaded with the value to write. Since these don't currently fit within the scheme we have for sysreg generation add manual encodings like we currently do for other similar registers such as SVCR. Since it is required that these immediate versions be encoded with xzr as the source register provide asm wrapper which ensure this is the case. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/include/asm/daifflags.h | 1 + arch/arm64/include/asm/nmi.h | 16 +++++++++++++++- arch/arm64/include/asm/sysreg.h | 2 ++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index cfdde3a56805..80f3eb49da92 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -141,4 +141,5 @@ static inline void local_daif_inherit(struct pt_regs *regs) */ write_sysreg(flags, daif); } + #endif diff --git a/arch/arm64/include/asm/nmi.h b/arch/arm64/include/asm/nmi.h index 4cd14b6af88b..c51b6cb25221 100644 --- a/arch/arm64/include/asm/nmi.h +++ b/arch/arm64/include/asm/nmi.h @@ -1,4 +1,7 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 ARM Ltd. + */ #ifndef __ASM_NMI_H #define __ASM_NMI_H @@ -14,4 +17,15 @@ void dynamic_ipi_setup(int cpu); void dynamic_ipi_teardown(int cpu); #endif /* !__ASSEMBLER__ */ + +static __always_inline void _allint_clear(void) +{ + asm volatile(__msr_s(SYS_ALLINT_CLR, "xzr")); +} + +static __always_inline void _allint_set(void) +{ + asm volatile(__msr_s(SYS_ALLINT_SET, "xzr")); +} + #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9705f7abd428..59824098e006 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -129,6 +129,8 @@ * System registers, organised loosely by encoding but grouped together * where the architected name contains an index. e.g. ID_MMFR_EL1. */ +#define SYS_ALLINT_CLR sys_reg(0, 1, 4, 0, 0) +#define SYS_ALLINT_SET sys_reg(0, 1, 4, 1, 0) #define SYS_SVCR_SMSTOP_SM_EL0 sys_reg(0, 3, 4, 2, 3) #define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3) #define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3) -- Gitee From 4b85794fc050fa4a9b160953da45fed21095ee65 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 7 Oct 2022 12:17:01 +0100 Subject: [PATCH 04/26] arm64/asm: Introduce assembly macros for managing ALLINT kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90N2C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git/commit/?h=arm64-nmi&id=4f8ecfa5830f4605a34d0982937351940147e4ef ---------------------------------------------------------------------- In order to allow assembly code to ensure that not even superpriorty interrupts can preempt it provide macros for enabling and disabling ALLINT.ALLINT. This is not integrated into the existing DAIF macros since we do not always wish to manage ALLINT along with DAIF and the use of DAIF in the naming of the existing macros might lead to surprises if ALLINT is also managed. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/include/asm/assembler.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 63db8e5ec9f8..22b93b470503 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -37,6 +37,22 @@ msr daifset, #0xf .endm + .macro disable_allint +#ifdef CONFIG_ARM64_NMI +alternative_if ARM64_HAS_NMI + msr_s SYS_ALLINT_SET, xzr +alternative_else_nop_endif +#endif + .endm + + .macro enable_allint +#ifdef CONFIG_ARM64_NMI +alternative_if ARM64_HAS_NMI + msr_s SYS_ALLINT_CLR, xzr +alternative_else_nop_endif +#endif + .endm + .macro disable_daif msr daifset, #0xf .endm -- Gitee From 0bbb35985617f2130a507e2bc677f47a0331608b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 6 Oct 2022 18:21:35 +0100 Subject: [PATCH 05/26] arm64/hyp-stub: Enable access to ALLINT kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90N2C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git/commit/?h=arm64-nmi&id=6a7b89e1878322de4f87351ad382635618f9d03a ---------------------------------------------------------------------- In order to use NMIs we need to ensure that traps are disabled for it so update HCRX_EL2 to ensure that TALLINT is not set when we detect support for NMIs. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/include/asm/sysreg.h | 3 +++ arch/arm64/kernel/head.S | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 59824098e006..f78536b32b61 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -363,6 +363,8 @@ #define SYS_PAR_EL1_F BIT(0) #define SYS_PAR_EL1_FST GENMASK(6, 1) +#define HCRX_EL2_TALLINT_MASK GENMASK(6, 6) + /*** Statistical Profiling Extension ***/ #define SMPRI_EL1_PRIORITY_MASK 0xf @@ -1343,6 +1345,7 @@ #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 /* id_aa64pfr1 */ +#define ID_AA64PFR1_NMI_SHIFT 36 #define ID_AA64PFR1_SME_SHIFT 24 #define ID_AA64PFR1_MPAMFRAC_SHIFT 16 #define ID_AA64PFR1_RASFRAC_SHIFT 12 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4704d00d3813..eb25a4250e54 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -557,11 +557,24 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr sctlr_el2, x0 #ifdef CONFIG_ARM64_VHE + mrs x2, id_aa64pfr1_el1 + ubfx x2, x2, #ID_AA64PFR1_NMI_SHIFT, #4 + cbz x2, .Lskip_nmi +.Linit_nmi: + mrs x2, id_aa64mmfr1_el1 + ubfx x2, x2, #ID_AA64MMFR1_HCX_SHIFT, #4 + cbz x2, .Lskip_nmi + + mrs_s x1, SYS_HCRX_EL2 + bic x1, x1, #HCRX_EL2_TALLINT_MASK // Don't trap ALLINT + msr_s SYS_HCRX_EL2, x1 + /* * Check for VHE being present. For the rest of the EL2 setup, * x2 being non-zero indicates that we do have VHE, and that the * kernel is intended to run at EL2. */ +.Lskip_nmi: mrs x2, id_aa64mmfr1_el1 ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 #else -- Gitee From 798ac6fa2bad3bc55bbe36f70bca555706a6a309 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 3 Nov 2022 15:50:08 +0000 Subject: [PATCH 06/26] arm64/cpufeature: Detect PE support for FEAT_NMI kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90N2C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git/commit/?h=arm64-nmi&id=12b82c9d5754e1e85573fc35d931a7337c9b9658 ---------------------------------------------------------------------- Use of FEAT_NMI requires that all the PEs in the system and the GIC have NMI support. This patch implements the PE part of that detection. In order to avoid problematic interactions between real and pseudo NMIs we disable the architected feature if the user has enabled pseudo NMIs on the command line. If this is done on a system where support for the architected feature is detected then a warning is printed during boot in order to help users spot what is likely to be a misconfiguration. In order to allow KVM to offer the feature to guests even if pseudo NMIs are in use by the host we have a separate feature for the raw feature which is used in KVM. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/include/asm/cpucaps.h | 2 + arch/arm64/include/asm/cpufeature.h | 6 +++ arch/arm64/include/asm/sysreg.h | 5 +++ arch/arm64/kernel/cpufeature.c | 67 ++++++++++++++++++++++++++++- 4 files changed, 79 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 0da53f0c4fbf..a787035d6aff 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -86,5 +86,7 @@ #define ARM64_WORKAROUND_PHYTIUM_FT3386 78 #define ARM64_NCAPS 80 +#define ARM64_HAS_NMI 81 +#define ARM64_USES_NMI 82 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index dd7d18cfbd1e..b21f13aee50d 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -770,6 +770,12 @@ static __always_inline bool system_uses_irq_prio_masking(void) cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); } +static __always_inline bool system_uses_nmi(void) +{ + return IS_ENABLED(CONFIG_ARM64_NMI) && + cpus_have_const_cap(ARM64_USES_NMI); +} + static inline bool system_supports_mte(void) { return IS_ENABLED(CONFIG_ARM64_MTE) && diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f78536b32b61..1c559da4ff73 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1200,6 +1200,8 @@ #define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_SPINTMASK (BIT(62)) +#define SCTLR_EL1_NMI (BIT(61)) #define SCTLR_EL1_BT1 (BIT(36)) #define SCTLR_EL1_BT0 (BIT(35)) #define SCTLR_EL1_UCI (BIT(26)) @@ -1359,6 +1361,9 @@ #define ID_AA64PFR1_BT_BTI 0x1 #define ID_AA64PFR1_SME 1 +#define ID_AA64PFR1_NMI_IMP_DEF 0x1 +#define ID_AA64PFR1_NMI_IMP_NI 0x0 + #define ID_AA64PFR1_MTE_NI 0x0 #define ID_AA64PFR1_MTE_EL0 0x1 #define ID_AA64PFR1_MTE 0x2 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9cb9a209b63a..a845a76f8c6b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -84,6 +84,7 @@ #include #include #include +#include #include #include #include @@ -245,6 +246,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_NMI_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0), @@ -2054,9 +2056,11 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_E0PD */ -#ifdef CONFIG_ARM64_PSEUDO_NMI +#if IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) || IS_ENABLED(CONFIG_ARM64_NMI) bool enable_pseudo_nmi; +#endif +#ifdef CONFIG_ARM64_PSEUDO_NMI static int __init early_enable_pseudo_nmi(char *p) { return strtobool(p, &enable_pseudo_nmi); @@ -2070,6 +2074,41 @@ static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry, } #endif +#ifdef CONFIG_ARM64_NMI +static bool use_nmi(const struct arm64_cpu_capabilities *entry, int scope) +{ + if (!has_cpuid_feature(entry, scope)) + return false; + + /* + * Having both real and pseudo NMIs enabled simultaneously is + * likely to cause confusion. Since pseudo NMIs must be + * enabled with an explicit command line option, if the user + * has set that option on a system with real NMIs for some + * reason assume they know what they're doing. + */ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && enable_pseudo_nmi) { + pr_info("Pseudo NMI enabled, not using architected NMI\n"); + return false; + } + + return true; +} + +static void nmi_enable(const struct arm64_cpu_capabilities *__unused) +{ + /* + * Enable use of NMIs controlled by ALLINT, SPINTMASK should + * be clear by default but make it explicit that we are using + * this mode. Ensure that ALLINT is clear first in order to + * avoid leaving things masked. + */ + _allint_clear(); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPINTMASK, SCTLR_EL1_NMI); + isb(); +} +#endif + #ifdef CONFIG_ARM64_BTI static void bti_enable(const struct arm64_cpu_capabilities *__unused) { @@ -2736,6 +2775,32 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = fa64_kernel_enable, }, #endif /* CONFIG_ARM64_SME */ +#ifdef CONFIG_ARM64_NMI + { + .desc = "Non-maskable Interrupts present", + .capability = ARM64_HAS_NMI, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_NMI_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64PFR1_NMI_IMP_DEF, + .matches = has_cpuid_feature, + }, + { + .desc = "Non-maskable Interrupts enabled", + .capability = ARM64_USES_NMI, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_NMI_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64PFR1_NMI_IMP_DEF, + .matches = use_nmi, + .cpu_enable = nmi_enable, + }, +#endif + #ifdef CONFIG_FAST_SYSCALL { .desc = "Xcall Support", -- Gitee From 23b54d02afb261ea107df42067bd638a38c58413 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 11 Nov 2022 13:35:25 +0000 Subject: [PATCH 07/26] arm64/nmi: Manage masking for superpriority interrupts along with DAIF kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90N2C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git/commit/?h=arm64-nmi&id=ce8c28fbdd6cac9405640e5fb50ccacfa3b49ce0 ---------------------------------------------------------------------- As we do for pseudo NMIs add code to our DAIF management which keeps superpriority interrupts unmasked when we have asynchronous exceptions enabled. Since superpriority interrupts are not masked through DAIF like pseduo NMIs are we also need to modify the assembler macros for managing DAIF to ensure that the masking is done in the assembly code. At present users of the assembly macros always mask pseudo NMIs. There is a difference to the actual handling between pseudo NMIs and superpriority interrupts in the assembly save_and_disable_irq and restore_irq macros, these cover both interrupts and FIQs using DAIF without regard for the use of pseudo NMIs so also mask those but are not updated here to mask superpriority interrupts. Given the names it is not clear that the behaviour with pseudo NMIs is particularly intentional, and in any case these macros are only used in the implementation of alternatives for software PAN while hardware PAN has been mandatory since v8.1 so it is not anticipated that practical systems with support for FEAT_NMI will ever execute the affected code. This should be a conservative set of masked regions, we may be able to relax this in future, but this should represent a good starting point. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/include/asm/assembler.h | 2 ++ arch/arm64/include/asm/daifflags.h | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 22b93b470503..7dc2ab36610f 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -54,11 +54,13 @@ alternative_else_nop_endif .endm .macro disable_daif + disable_allint msr daifset, #0xf .endm .macro enable_daif msr daifclr, #0xf + enable_allint .endm .macro restore_daif, flags:req diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 80f3eb49da92..f1c3d2f204f8 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #define DAIF_PROCCTX 0 @@ -35,6 +36,9 @@ static inline void local_daif_mask(void) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + if (system_uses_nmi()) + _allint_set(); + trace_hardirqs_off(); } @@ -116,6 +120,14 @@ static inline void local_daif_restore(unsigned long flags) write_sysreg(flags, daif); + /* If we can take asynchronous errors we can take NMIs */ + if (system_uses_nmi()) { + if (flags & PSR_A_BIT) + _allint_set(); + else + _allint_clear(); + } + if (irq_disabled) trace_hardirqs_off(); } -- Gitee From c90d821eff77dfdde54f7329bcab3ae2308ef69f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 12 Dec 2022 13:43:36 +0000 Subject: [PATCH 08/26] arm64/entry: Don't call preempt_schedule_irq() with NMIs masked kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90N2C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git/commit/?h=arm64-nmi&id=6465ab3af5ccc2122dc468ed4c7e9c66864c9f6f ---------------------------------------------------------------------- As we do for pseudo NMIs don't call preempt_schedule_irq() when architechted NMIs are masked. If they are masked then we are calling from a preempting context so skip preemption. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/include/asm/sysreg.h | 3 +++ arch/arm64/kernel/process.c | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 1c559da4ff73..96de9e7ec3c2 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -340,6 +340,8 @@ #define SYS_SPSR_EL1 sys_reg(3, 0, 4, 0, 0) #define SYS_ELR_EL1 sys_reg(3, 0, 4, 0, 1) +#define SYS_ALLINT sys_reg(3, 0, 4, 3, 0) + #define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) #define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0) @@ -364,6 +366,7 @@ #define SYS_PAR_EL1_FST GENMASK(6, 1) #define HCRX_EL2_TALLINT_MASK GENMASK(6, 6) +#define ALLINT_ALLINT BIT(13) /*** Statistical Profiling Extension ***/ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 14300c9e06d5..cf6ed56593a1 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -737,6 +737,15 @@ core_initcall(tagged_addr_init); asmlinkage void __sched arm64_preempt_schedule_irq(void) { + /* + * Architected NMIs are unmasked prior to handling regular + * IRQs and masked while handling FIQs. If ALLINT is set then + * we are in a NMI or other preempting context so skip + * preemption. + */ + if (system_uses_nmi() && (read_sysreg_s(SYS_ALLINT) & ALLINT_ALLINT)) + return; + lockdep_assert_irqs_disabled(); /* -- Gitee From 5493c19aae47c511b05fadfafff8b46d812d93ae Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 2 Nov 2022 21:13:07 +0000 Subject: [PATCH 09/26] arm64/irq: Document handling of FEAT_NMI in irqflags.h kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90N2C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git/commit/?h=arm64-nmi&id=e5f42bd64bfa99569035ee9dec71e7e8e59aa0d6 ---------------------------------------------------------------------- We have documentation at the top of irqflags.h which explains the DAIF masking. Since the additional masking with NMIs is related and also covers the IF in DAIF extend the comment to note what's going on with NMIs though none of the code in irqflags.h is updated to handle NMIs. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/include/asm/irqflags.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index ff328e5bbb75..14b2fad48fc9 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -15,12 +15,20 @@ * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'dai' * order: * Masking debug exceptions causes all other exceptions to be masked too/ - * Masking SError masks irq, but not debug exceptions. Masking irqs has no - * side effects for other flags. Keeping to this order makes it easier for - * entry.S to know which exceptions should be unmasked. + * Masking SError masks IRQ/FIQ, but not debug exceptions. IRQ and FIQ are + * always masked and unmasked together, and have no side effects for other + * flags. Keeping to this order makes it easier for entry.S to know which + * exceptions should be unmasked. * - * FIQ is never expected, but we mask it when we disable debug exceptions, and - * unmask it at all other times. + * With the addition of the FEAT_NMI extension we gain an additional + * class of superpriority IRQ/FIQ which is separately masked with a + * choice of modes controlled by SCTLR_ELn.{SPINTMASK,NMI}. Linux + * sets SPINTMASK to 0 and NMI to 1 which results in ALLINT.ALLINT + * masking both superpriority interrupts and IRQ/FIQ regardless of the + * I and F settings. Since these superpriority interrupts are being + * used as NMIs we do not include them in the interrupt masking here, + * anything that requires that NMIs be masked needs to explicitly do + * so. */ /* -- Gitee From 5de9d3919614b532b6c73d91f5f943db82b6ac17 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 13 May 2022 14:30:38 +0100 Subject: [PATCH 10/26] irqchip/gic-v3: Fix priority mask handling mainline inclusion from mainline-v5.19-rc1 commit 614ab80c96474682157cabb14f8c8602b3422e90 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID47CL CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=614ab80c96474682157cabb14f8c8602b3422e90 ---------------------------------------------------------------------- When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent usage of local_irq_enable() and resulting in softirqs being run with IRQs erroneously masked (possibly resulting in deadlocks). This can happen when an IRQ exception is taken from a context where regular IRQs were unmasked, and either: (1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ being withdrawn since the IRQ exception was taken). (2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged. When an NMI is taken from a context where regular IRQs were masked, there is no problem. When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with perf, e.g. | # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1 | ------------[ cut here ]------------ | WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c | Modules linked in: | CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12 | Hardware name: linux,dummy-virt (DT) | pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : arch_local_irq_enable+0x4c/0x6c | lr : __do_softirq+0x110/0x5d8 | sp : ffff8000080bbbc0 | pmr_save: 000000f0 | x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000 | x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008 | x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20 | x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000 | x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000 | x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294 | x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70 | x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf | x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001 | x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0 | Call trace: | arch_local_irq_enable+0x4c/0x6c | __irq_exit_rcu+0x180/0x1ac | irq_exit_rcu+0x1c/0x44 | el1_interrupt+0x4c/0xe4 | el1h_64_irq_handler+0x18/0x24 | el1h_64_irq+0x74/0x78 | smpboot_thread_fn+0x68/0x2c0 | kthread+0x124/0x130 | ret_from_fork+0x10/0x20 | irq event stamp: 193241 | hardirqs last enabled at (193240): [] __do_softirq+0x10c/0x5d8 | hardirqs last disabled at (193241): [] el1_dbg+0x24/0x90 | softirqs last enabled at (193234): [] __do_softirq+0x470/0x5d8 | softirqs last disabled at (193239): [] __irq_exit_rcu+0x180/0x1ac | ---[ end trace 0000000000000000 ]--- The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the interrupted context, but the structure of gic_handle_irq() makes this also depend on whether the GIC reports an IRQ, NMI, or special INTID: * When the interrupted context had regular IRQs masked (and hence the interrupt must be an NMI), the entry code performs the NMI entry/exit and gic_handle_irq() should return with DAIF and ICC_PMR_EL1 unchanged. This is handled correctly today. * When the interrupted context had regular IRQs unmasked, the entry code performs IRQ entry/exit, but expects gic_handle_irq() to always update ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to returning (which it must do prior to invoking any regular IRQ handler). This unbalanced calling convention is necessary because we don't know whether an NMI has been taken until acknowledged by a read from ICC_IAR1_EL1, and so we need to perform the read with NMI masked in case an NMI has been taken (and needs to be handled with NMIs masked). Unfortunately, this is not handled consistently: - When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns immediately without manipulating ICC_PMR_EL1 and DAIF. - When RPR_EL1 indicates an NMI, gic_handle_irq() calls gic_handle_nmi() to invoke the NMI handler, then returns without manipulating ICC_PMR_EL1 and DAIF. - For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF prior to invoking the IRQ handler. There were related problems with special INTID handling in the past, where if an exception was taken from a context with regular IRQs masked and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously unmask NMIs in NMI context permitted an unexpected nested NMI. That case specifically was fixed by commit: a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups") ... but unfortunately that commit added an inverse problem, where if an exception was taken from a context with regular IRQs *unmasked* and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously fail to unmask NMIs (and consequently regular IRQs could not be unmasked during softirq processing). Before and after that commit, if an NMI was taken from a context with regular IRQs unmasked gic_handle_irq() would not unmask NMIs prior to returning, leading to the same problem with softirq handling. This patch fixes this by restructuring gic_handle_irq(), splitting it into separate irqson/irqsoff helper functions which consistently perform the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context, regardless of the event indicated by ICC_IAR1_EL1. The special INTID handling is moved into the low-level IRQ/NMI handler invocation helper functions, so that early returns don't prevent the required manipulation of DAIF + ICC_PMR_EL1. Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs") Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Thomas Gleixner Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com Signed-off-by: Shi Yang --- drivers/irqchip/irq-gic-v3.c | 133 ++++++++++++++++++++++++++--------- 1 file changed, 99 insertions(+), 34 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 75447d091a23..71b28c268c70 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -660,28 +660,12 @@ static inline void gic_complete_ack(u32 irqnr) isb(); } -static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) +static bool gic_rpr_is_nmi_prio(void) { - bool irqs_enabled = interrupts_enabled(regs); - int err; - - if (irqs_enabled) - nmi_enter(); - - gic_complete_ack(irqnr); - - /* - * Leave the PSR.I bit set to prevent other NMIs to be - * received while handling this one. - * PSR.I will be restored when we ERET to the - * interrupted context. - */ - err = handle_domain_nmi(gic_data.domain, irqnr, regs); - if (err) - gic_deactivate_unhandled(irqnr); + if (!gic_supports_nmi()) + return false; - if (irqs_enabled) - nmi_exit(); + return unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI)); } static u32 do_read_iar(struct pt_regs *regs) @@ -718,20 +702,61 @@ static u32 do_read_iar(struct pt_regs *regs) return iar; } -static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +static bool gic_irqnr_is_special(u32 irqnr) { - u32 irqnr; + return irqnr >= 1020 && irqnr <= 1023; +} - irqnr = do_read_iar(regs); +static void __gic_handle_irq(u32 irqnr, struct pt_regs *regs) +{ + if (gic_irqnr_is_special(irqnr)) + return; - /* Check for special IDs first */ - if ((irqnr >= 1020 && irqnr <= 1023)) - return; + gic_complete_ack(irqnr); + + if (handle_domain_irq(gic_data.domain, irqnr, regs)) { + WARN_ONCE(true, "Unexpected interrupt received!\n"); + gic_deactivate_unhandled(irqnr); + } +} - if (gic_supports_nmi() && - unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI))) { - gic_handle_nmi(irqnr, regs); +static void __gic_handle_nmi(u32 irqnr, struct pt_regs *regs) +{ + if (gic_irqnr_is_special(irqnr)) return; + + gic_complete_ack(irqnr); + + if (handle_domain_nmi(gic_data.domain, irqnr, regs)) { + WARN_ONCE(true, "Unexpected pseudo-NMI (irqnr %u)\n", irqnr); + gic_deactivate_unhandled(irqnr); + } +} + +/* + * An exception has been taken from a context with IRQs enabled, and this could + * be an IRQ or an NMI. + * + * The entry code called us with DAIF.IF set to keep NMIs masked. We must clear + * DAIF.IF (and update ICC_PMR_EL1 to mask regular IRQs) prior to returning, + * after handling any NMI but before handling any IRQ. + * + * The entry code has performed IRQ entry, and if an NMI is detected we must + * perform NMI entry/exit around invoking the handler. + */ +static void __gic_handle_irq_from_irqson(struct pt_regs *regs) +{ + bool is_nmi; + u32 irqnr; + + irqnr = gic_read_iar(); + + is_nmi = gic_rpr_is_nmi_prio(); + + if (is_nmi) { + nmi_enter(); + __gic_handle_nmi(irqnr, regs); + nmi_exit(); } if (gic_prio_masking_enabled()) { @@ -739,12 +764,52 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs gic_arch_enable_irqs(); } - gic_complete_ack(irqnr); + if (!is_nmi) + __gic_handle_irq(irqnr, regs); +} - if (handle_domain_irq(gic_data.domain, irqnr, regs)) { - WARN_ONCE(true, "Unexpected interrupt received!\n"); - gic_deactivate_unhandled(irqnr); - } +/* + * An exception has been taken from a context with IRQs disabled, which can only + * be an NMI. + * + * The entry code called us with DAIF.IF set to keep NMIs masked. We must leave + * DAIF.IF (and ICC_PMR_EL1) unchanged. + * + * The entry code has performed NMI entry. + */ +static void __gic_handle_irq_from_irqsoff(struct pt_regs *regs) +{ + u64 pmr; + u32 irqnr; + + /* + * We were in a context with IRQs disabled. However, the + * entry code has set PMR to a value that allows any + * interrupt to be acknowledged, and not just NMIs. This can + * lead to surprising effects if the NMI has been retired in + * the meantime, and that there is an IRQ pending. The IRQ + * would then be taken in NMI context, something that nobody + * wants to debug twice. + * + * Until we sort this, drop PMR again to a level that will + * actually only allow NMIs before reading IAR, and then + * restore it to what it was. + */ + pmr = gic_read_pmr(); + gic_pmr_mask_irqs(); + isb(); + irqnr = gic_read_iar(); + gic_write_pmr(pmr); + + __gic_handle_nmi(irqnr, regs); +} + +static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +{ + if (unlikely(gic_supports_nmi() && !interrupts_enabled(regs))) + __gic_handle_irq_from_irqsoff(regs); + else + __gic_handle_irq_from_irqson(regs); } #ifdef CONFIG_FAST_IRQ -- Gitee From fe36b636feb8320ae1554694670c9837436703fb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 11 Oct 2022 18:53:47 +0100 Subject: [PATCH 11/26] arm64/nmi: Add handling of superpriority interrupts as NMIs kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90N2C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git/commit/?h=arm64-nmi&id=17080280284600f96b5a613898e5c3e698843e33 ---------------------------------------------------------------------- Our goal with superpriority interrupts is to use them as NMIs, taking advantage of the much smaller regions where they are masked to allow prompt handling of the most time critical interrupts. When an interrupt configured with superpriority we will enter EL1 as normal for any interrupt, the presence of a superpriority interrupt is indicated with a status bit in ISR_EL1. We use this to check for the presence of a superpriority interrupt before we unmask anything in elX_interrupt(), reporting without unmasking any interrupts. If no superpriority interrupt is present then we handle normal interrupts as normal, superpriority interrupts will be unmasked while doing so as a result of setting DAIF_PROCCTX. Both IRQs and FIQs may be configured with superpriority so we handle both, passing an additional root handler into the elX_interrupt() function along with the mask for the bit in ISR_EL1 which indicates the presence of the relevant kind of superpriority interrupt. These root handlers can be configured by the interrupt controller similarly to the root handlers for normal interrupts using the newly added set_handle_nmi_irq() and set_handle_nmi_fiq() functions. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/include/asm/sysreg.h | 2 ++ drivers/irqchip/irq-gic-v3.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 96de9e7ec3c2..917d4ca495b1 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -343,6 +343,7 @@ #define SYS_ALLINT sys_reg(3, 0, 4, 3, 0) #define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) +#define SYS_ICC_NMIAR1_EL1 sys_reg(3, 0, 12, 9, 5) #define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0) #define SYS_AFSR1_EL1 sys_reg(3, 0, 5, 1, 1) @@ -367,6 +368,7 @@ #define HCRX_EL2_TALLINT_MASK GENMASK(6, 6) #define ALLINT_ALLINT BIT(13) +#define ISR_EL1_IS BIT(10) /*** Statistical Profiling Extension ***/ diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 71b28c268c70..0a6e3a1d4af1 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -804,8 +804,38 @@ static void __gic_handle_irq_from_irqsoff(struct pt_regs *regs) __gic_handle_nmi(irqnr, regs); } +#ifdef CONFIG_ARM64 +static inline u64 gic_read_nmiar(void) +{ + u64 irqstat; + + irqstat = read_sysreg_s(SYS_ICC_NMIAR1_EL1); + + dsb(sy); + + return irqstat; +} + +static __always_inline void __el1_nmi(struct pt_regs *regs) +{ + u32 irqnr = gic_read_nmiar(); + + nmi_enter(); + __gic_handle_nmi(irqnr, regs); + nmi_exit(); +} +#endif + static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) { +#ifdef CONFIG_ARM64 + /* Is there a NMI to handle? */ + if (system_uses_nmi() && (read_sysreg(isr_el1) & ISR_EL1_IS)) { + __el1_nmi(regs); + return; + } +#endif + if (unlikely(gic_supports_nmi() && !interrupts_enabled(regs))) __gic_handle_irq_from_irqsoff(regs); else -- Gitee From eeea3ccef670caeef6e8d3c9ef2c70cc28577960 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 11 Oct 2022 12:57:00 +0100 Subject: [PATCH 12/26] arm64/nmi: Add Kconfig for NMI kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90N2C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git/commit/?h=arm64-nmi&id=316c093cb6a4d529a7ee2fe922d2328ead089516 ---------------------------------------------------------------------- Since NMI handling is in some fairly hot paths we provide a Kconfig option which allows support to be compiled out when not needed. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/Kconfig | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0f1accb7355f..70d69cb28864 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1990,6 +1990,23 @@ config ARM64_EPAN if the cpu does not implement the feature. endmenu +menu "ARMv8.8 architectural features" + +config ARM64_NMI + bool "Enable support for Non-maskable Interrupts (NMI)" + default y + help + Non-maskable interrupts are an architecture and GIC feature + which allow the system to configure some interrupts to be + configured to have superpriority, allowing them to be handled + before other interrupts and masked for shorter periods of time. + + The feature is detected at runtime, and will remain disabled + if the cpu does not implement the feature. It will also be + disabled if pseudo NMIs are enabled at runtime. + +endmenu # "ARMv8.8 architectural features" + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y -- Gitee From cc9589566184ea820a7df08d8adbfdc32ec8a6b1 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 13 Oct 2022 16:05:33 +0200 Subject: [PATCH 13/26] irqchip/gic-v3: Implement FEAT_GICv3_NMI support kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90N2C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git/commit/?h=arm64-nmi&id=a9952323528691e1dd56a01e507ec53fc1053f86 ---------------------------------------------------------------------- The FEAT_GICv3_NMI GIC feature coupled with the CPU FEAT_NMI enables handling NMI interrupts in HW on aarch64, by adding a superpriority interrupt to the existing GIC priority scheme. Implement GIC driver support for the FEAT_GICv3_NMI feature. Rename gic_supports_nmi() helper function to gic_supports_pseudo_nmis() to make the pseudo NMIs code path clearer and more explicit. Check, through the ARM64 capabilitity infrastructure, if support for FEAT_NMI was detected on the core and the system has not overridden the detection and forced pseudo-NMIs enablement. If FEAT_NMI is detected, it was not overridden (check embedded in the system_uses_nmi() call) and the GIC supports the FEAT_GICv3_NMI feature, install an NMI handler and initialize NMIs related HW GIC registers. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- drivers/irqchip/irq-gic-v3.c | 131 ++++++++++++++++++----------- include/linux/irqchip/arm-gic-v3.h | 4 + 2 files changed, 85 insertions(+), 50 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 0a6e3a1d4af1..c5d482b8429a 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -61,6 +61,7 @@ struct gic_chip_data { u32 nr_redist_regions; u64 flags; bool has_rss; + bool has_nmi; unsigned int ppi_nr; struct partition_desc **ppi_descs; }; @@ -142,6 +143,20 @@ static DEFINE_PER_CPU(bool, has_rss); /* Our default, arbitrary priority value. Linux only uses one anyway. */ #define DEFAULT_PMR_VALUE 0xf0 +#ifdef CONFIG_ARM64 +#include + +static inline bool has_v3_3_nmi(void) +{ + return gic_data.has_nmi && system_uses_nmi(); +} +#else +static inline bool has_v3_3_nmi(void) +{ + return false; +} +#endif + phys_addr_t get_gicr_paddr(int cpu) { return (per_cpu_ptr(gic_data.rdists.rdist, cpu))->phys_base; @@ -334,6 +349,42 @@ static int gic_peek_irq(struct irq_data *d, u32 offset) return !!(readl_relaxed(base + offset + (index / 32) * 4) & mask); } +static DEFINE_RAW_SPINLOCK(irq_controller_lock); + +static void gic_irq_configure_nmi(struct irq_data *d, bool enable) +{ + void __iomem *base, *addr; + u32 offset, index, mask, val; + + offset = convert_offset_index(d, GICD_INMIR, &index); + mask = 1 << (index % 32); + + if (gic_irq_in_rdist(d)) + base = gic_data_rdist_sgi_base(); + else + base = gic_data.dist_base; + + addr = base + offset + (index / 32) * 4; + + raw_spin_lock(&irq_controller_lock); + + val = readl_relaxed(addr); + val = enable ? (val | mask) : (val & ~mask); + writel_relaxed(val, addr); + + raw_spin_unlock(&irq_controller_lock); +} + +static void gic_irq_enable_nmi(struct irq_data *d) +{ + gic_irq_configure_nmi(d, true); +} + +static void gic_irq_disable_nmi(struct irq_data *d) +{ + gic_irq_configure_nmi(d, false); +} + static void gic_poke_irq(struct irq_data *d, u32 offset) { void (*rwp_wait)(void); @@ -380,6 +431,12 @@ static void gic_unmask_irq(struct irq_data *d) gic_poke_irq(d, GICD_ISENABLER); } +static inline bool gic_supports_pseudo_nmis(void) +{ + return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && + static_branch_likely(&supports_pseudo_nmis); +} + static int gic_irq_set_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool val) { @@ -462,7 +519,7 @@ static int gic_irq_nmi_setup(struct irq_data *d) struct irq_desc *desc = irq_to_desc(d->irq); u32 idx; - if (!gic_supports_nmi()) + if (!gic_supports_pseudo_nmis() && !has_v3_3_nmi()) return -EINVAL; if (gic_peek_irq(d, GICD_ISENABLER)) { @@ -496,7 +553,10 @@ static int gic_irq_nmi_setup(struct irq_data *d) break; } - gic_irq_set_prio(d, GICD_INT_NMI_PRI); + if (has_v3_3_nmi()) + gic_irq_enable_nmi(d); + else + gic_irq_set_prio(d, GICD_INT_NMI_PRI); return 0; } @@ -506,7 +566,7 @@ static void gic_irq_nmi_teardown(struct irq_data *d) struct irq_desc *desc = irq_to_desc(d->irq); u32 idx; - if (WARN_ON(!gic_supports_nmi())) + if (WARN_ON(!gic_supports_pseudo_nmis() && !has_v3_3_nmi())) return; if (gic_peek_irq(d, GICD_ISENABLER)) { @@ -538,7 +598,10 @@ static void gic_irq_nmi_teardown(struct irq_data *d) break; } - gic_irq_set_prio(d, GICD_INT_DEF_PRI); + if (has_v3_3_nmi()) + gic_irq_disable_nmi(d); + else + gic_irq_set_prio(d, GICD_INT_DEF_PRI); } static void gic_eoi_irq(struct irq_data *d) @@ -662,7 +725,7 @@ static inline void gic_complete_ack(u32 irqnr) static bool gic_rpr_is_nmi_prio(void) { - if (!gic_supports_nmi()) + if (!gic_supports_pseudo_nmis()) return false; return unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI)); @@ -836,7 +899,7 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs } #endif - if (unlikely(gic_supports_nmi() && !interrupts_enabled(regs))) + if (unlikely(gic_supports_pseudo_nmis() && !interrupts_enabled(regs))) __gic_handle_irq_from_irqsoff(regs); else __gic_handle_irq_from_irqson(regs); @@ -1253,7 +1316,7 @@ static void gic_cpu_sys_reg_init(void) /* Set priority mask register */ if (!gic_prio_masking_enabled()) { write_gicreg(DEFAULT_PMR_VALUE, ICC_PMR_EL1); - } else if (gic_supports_nmi()) { + } else if (gic_supports_pseudo_nmis()) { /* * Mismatch configuration with boot CPU, the system is likely * to die as interrupt masking will not work properly on all @@ -2035,11 +2098,16 @@ static const struct gic_quirk gic_quirks[] = { } }; +static void gic_enable_pseudo_nmis(void) +{ + static_branch_enable(&supports_pseudo_nmis); +} + static void gic_enable_nmi_support(void) { int i; - if (!gic_prio_masking_enabled()) + if (!gic_prio_masking_enabled() && !has_v3_3_nmi()) return; if (gic_data.flags & FLAGS_WORKAROUND_MTK_GICR_SAVE) { @@ -2055,47 +2123,11 @@ static void gic_enable_nmi_support(void) refcount_set(&ppi_nmi_refs[i], 0); /* - * Linux itself doesn't use 1:N distribution, so has no need to - * set PMHE. The only reason to have it set is if EL3 requires it - * (and we can't change it). - */ - if (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK) - static_branch_enable(&gic_pmr_sync); - - pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n", - static_branch_unlikely(&gic_pmr_sync) ? "forced" : "relaxed"); - - /* - * How priority values are used by the GIC depends on two things: - * the security state of the GIC (controlled by the GICD_CTRL.DS bit) - * and if Group 0 interrupts can be delivered to Linux in the non-secure - * world as FIQs (controlled by the SCR_EL3.FIQ bit). These affect the - * the ICC_PMR_EL1 register and the priority that software assigns to - * interrupts: - * - * GICD_CTRL.DS | SCR_EL3.FIQ | ICC_PMR_EL1 | Group 1 priority - * ----------------------------------------------------------- - * 1 | - | unchanged | unchanged - * ----------------------------------------------------------- - * 0 | 1 | non-secure | non-secure - * ----------------------------------------------------------- - * 0 | 0 | unchanged | non-secure - * - * where non-secure means that the value is right-shifted by one and the - * MSB bit set, to make it fit in the non-secure priority range. - * - * In the first two cases, where ICC_PMR_EL1 and the interrupt priority - * are both either modified or unchanged, we can use the same set of - * priorities. - * - * In the last case, where only the interrupt priorities are modified to - * be in the non-secure range, we use a different PMR value to mask IRQs - * and the rest of the values that we use remain unchanged. + * Initialize pseudo-NMIs only if GIC driver cannot take advantage + * of core (FEAT_NMI) and GIC (FEAT_GICv3_NMI) in HW */ - if (gic_has_group0() && !gic_dist_security_disabled()) - static_branch_enable(&gic_nonsecure_priorities); - - static_branch_enable(&supports_pseudo_nmis); + if (!has_v3_3_nmi()) + gic_enable_pseudo_nmis(); if (static_branch_likely(&supports_deactivate_key)) gic_eoimode1_chip.flags |= IRQCHIP_SUPPORTS_NMI; @@ -2161,8 +2193,7 @@ static int __init gic_init_bases(void __iomem *dist_base, irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED); gic_data.has_rss = !!(typer & GICD_TYPER_RSS); - pr_info("Distributor has %sRange Selector support\n", - gic_data.has_rss ? "" : "no "); + gic_data.has_nmi = !!(typer & GICD_TYPER_NMI); if (typer & GICD_TYPER_MBIS) { err = mbi_init(handle, gic_data.domain); diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index b8b723cdaaf0..dd4db4ec89db 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -30,6 +30,7 @@ #define GICD_ICFGR 0x0C00 #define GICD_IGRPMODR 0x0D00 #define GICD_NSACR 0x0E00 +#define GICD_INMIR 0x0F80 #define GICD_IGROUPRnE 0x1000 #define GICD_ISENABLERnE 0x1200 #define GICD_ICENABLERnE 0x1400 @@ -39,6 +40,7 @@ #define GICD_ICACTIVERnE 0x1C00 #define GICD_IPRIORITYRnE 0x2000 #define GICD_ICFGRnE 0x3000 +#define GICD_INMIRnE 0x3B00 #define GICD_IROUTER 0x6000 #define GICD_IROUTERnE 0x8000 #define GICD_IDREGS 0xFFD0 @@ -85,6 +87,7 @@ #define GICD_TYPER_LPIS (1U << 17) #define GICD_TYPER_MBIS (1U << 16) #define GICD_TYPER_ESPI (1U << 8) +#define GICD_TYPER_NMI (1U << 9) #define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) #define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1) @@ -238,6 +241,7 @@ #define GICR_ICFGR0 GICD_ICFGR #define GICR_IGRPMODR0 GICD_IGRPMODR #define GICR_NSACR GICD_NSACR +#define GICR_INMIR0 GICD_INMIR #define GICR_TYPER_PLPIS (1U << 0) #define GICR_TYPER_VLPIS (1U << 1) -- Gitee From 6dd354df8f8db705d59ab21a4040b37dd5b1b9f6 Mon Sep 17 00:00:00 2001 From: Lecopzer Chen Date: Fri, 19 May 2023 10:18:40 -0700 Subject: [PATCH 14/26] watchdog/perf: adapt the watchdog_perf interface for async model mainline inclusion from mainline-v6.5-rc1 commit 930d8f8dbab97cb05dba30e67a2dfa0c6dbf4bc7 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID47CL CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=930d8f8dbab97cb05dba30e67a2dfa0c6dbf4bc7 ---------------------------------------------------------------------- When lockup_detector_init()->watchdog_hardlockup_probe(), PMU may be not ready yet. E.g. on arm64, PMU is not ready until device_initcall(armv8_pmu_driver_init). And it is deeply integrated with the driver model and cpuhp. Hence it is hard to push this initialization before smp_init(). But it is easy to take an opposite approach and try to initialize the watchdog once again later. The delayed probe is called using workqueues. It need to allocate memory and must be proceed in a normal context. The delayed probe is able to use if watchdog_hardlockup_probe() returns non-zero which means the return code returned when PMU is not ready yet. Provide an API - lockup_detector_retry_init() for anyone who needs to delayed init lockup detector if they had ever failed at lockup_detector_init(). The original assumption is: nobody should use delayed probe after lockup_detector_check() which has __init attribute. That is, anyone uses this API must call between lockup_detector_init() and lockup_detector_check(), and the caller must have __init attribute Link: https://lkml.kernel.org/r/20230519101840.v5.16.If4ad5dd5d09fb1309cebf8bcead4b6a5a7758ca7@changeid Reviewed-by: Petr Mladek Co-developed-by: Pingfan Liu Signed-off-by: Pingfan Liu Signed-off-by: Lecopzer Chen Signed-off-by: Douglas Anderson Suggested-by: Petr Mladek Cc: Andi Kleen Cc: Catalin Marinas Cc: Chen-Yu Tsai Cc: Christophe Leroy Cc: Colin Cross Cc: Daniel Thompson Cc: "David S. Miller" Cc: Guenter Roeck Cc: Ian Rogers Cc: Marc Zyngier Cc: Mark Rutland Cc: Masayoshi Mizuma Cc: Matthias Kaehlcke Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Randy Dunlap Cc: "Ravi V. Shankar" Cc: Ricardo Neri Cc: Stephane Eranian Cc: Stephen Boyd Cc: Sumit Garg Cc: Tzung-Bi Shih Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Shi Yang --- include/linux/nmi.h | 2 ++ kernel/watchdog.c | 67 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index ae9dbedb9849..cc32f888314e 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -13,6 +13,7 @@ #ifdef CONFIG_LOCKUP_DETECTOR void lockup_detector_init(void); +void lockup_detector_retry_init(void); void lockup_detector_soft_poweroff(void); void lockup_detector_cleanup(void); bool is_hardlockup(void); @@ -36,6 +37,7 @@ extern int sysctl_hardlockup_all_cpu_backtrace; #else /* CONFIG_LOCKUP_DETECTOR */ static inline void lockup_detector_init(void) { } +static inline void lockup_detector_retry_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } static inline void lockup_detector_cleanup(void) { } #endif /* !CONFIG_LOCKUP_DETECTOR */ diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 88be068e9922..f924ab981a57 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -116,7 +116,13 @@ void __weak watchdog_nmi_disable(unsigned int cpu) hardlockup_detector_perf_disable(); } -/* Return 0, if a NMI watchdog is available. Error code otherwise */ +/* + * Watchdog-detector specific API. + * + * Return 0 when hardlockup watchdog is available, negative value otherwise. + * Note that the negative value means that a delayed probe might + * succeed later. + */ int __weak __init watchdog_nmi_probe(void) { return hardlockup_detector_perf_init(); @@ -795,6 +801,62 @@ void __weak watchdog_ops_init(void) { } +static void __init lockup_detector_delay_init(struct work_struct *work); +static bool allow_lockup_detector_init_retry __initdata; + +static struct work_struct detector_work __initdata = + __WORK_INITIALIZER(detector_work, lockup_detector_delay_init); + +static void __init lockup_detector_delay_init(struct work_struct *work) +{ + int ret; + + ret = watchdog_nmi_probe(); + if (ret) { + pr_info("Delayed init of the lockup detector failed: %d\n", ret); + pr_info("Hard watchdog permanently disabled\n"); + return; + } + + allow_lockup_detector_init_retry = false; + + nmi_watchdog_available = true; + lockup_detector_setup(); +} + +/* + * lockup_detector_retry_init - retry init lockup detector if possible. + * + * Retry hardlockup detector init. It is useful when it requires some + * functionality that has to be initialized later on a particular + * platform. + */ +void __init lockup_detector_retry_init(void) +{ + /* Must be called before late init calls */ + if (!allow_lockup_detector_init_retry) + return; + + schedule_work(&detector_work); +} + +/* + * Ensure that optional delayed hardlockup init is proceed before + * the init code and memory is freed. + */ +static int __init lockup_detector_check(void) +{ + /* Prevent any later retry. */ + allow_lockup_detector_init_retry = false; + + /* Make sure no work is pending. */ + flush_work(&detector_work); + + return 0; + +} +late_initcall_sync(lockup_detector_check); + void __init lockup_detector_init(void) { watchdog_ops_init(); @@ -807,6 +869,9 @@ void __init lockup_detector_init(void) if (!nmi_watchdog_ops.watchdog_nmi_probe()) nmi_watchdog_available = true; + else + allow_lockup_detector_init_retry = true; + lockup_detector_setup(); #ifdef CONFIG_CORELOCKUP_DETECTOR if (enable_corelockup_detector) -- Gitee From 51af58e573c2dbe188b39f1aced94d0e04ed1ed9 Mon Sep 17 00:00:00 2001 From: Lecopzer Chen Date: Fri, 19 May 2023 10:18:41 -0700 Subject: [PATCH 15/26] arm64: add hw_nmi_get_sample_period for preparation of lockup detector mainline inclusion from mainline-v6.5-rc1 commit 94946f9eaac116f2943ec79ec3df1ec2fc92ae07 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID47CL CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=94946f9eaac116f2943ec79ec3df1ec2fc92ae07 ---------------------------------------------------------------------- Set safe maximum CPU frequency to 5 GHz in case a particular platform doesn't implement cpufreq driver. Although, architecture doesn't put any restrictions on maximum frequency but 5 GHz seems to be safe maximum given the available Arm CPUs in the market which are clocked much less than 5 GHz. On the other hand, we can't make it much higher as it would lead to a large hard-lockup detection timeout on parts which are running slower (eg. 1GHz on Developerbox) and doesn't possess a cpufreq driver. Link: https://lkml.kernel.org/r/20230519101840.v5.17.Ia9d02578e89c3f44d3cb12eec8b0176603c8ab2f@changeid Co-developed-by: Sumit Garg Signed-off-by: Sumit Garg Co-developed-by: Pingfan Liu Signed-off-by: Pingfan Liu Signed-off-by: Lecopzer Chen Signed-off-by: Douglas Anderson Cc: Andi Kleen Cc: Catalin Marinas Cc: Chen-Yu Tsai Cc: Christophe Leroy Cc: Colin Cross Cc: Daniel Thompson Cc: "David S. Miller" Cc: Guenter Roeck Cc: Ian Rogers Cc: Marc Zyngier Cc: Mark Rutland Cc: Masayoshi Mizuma Cc: Matthias Kaehlcke Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Petr Mladek Cc: Randy Dunlap Cc: "Ravi V. Shankar" Cc: Ricardo Neri Cc: Stephane Eranian Cc: Stephen Boyd Cc: Tzung-Bi Shih Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Shi Yang --- arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/watchdog_hld.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 arch/arm64/kernel/watchdog_hld.c diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 1b635613df55..a5fb42a6c8d7 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o obj-$(CONFIG_CPU_IDLE) += cpuidle.o diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c new file mode 100644 index 000000000000..3dc9f4cc5ae9 --- /dev/null +++ b/arch/arm64/kernel/watchdog_hld.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +/* + * Safe maximum CPU frequency in case a particular platform doesn't implement + * cpufreq driver. Although, architecture doesn't put any restrictions on + * maximum frequency but 5 GHz seems to be safe maximum given the available + * Arm CPUs in the market which are clocked much less than 5 GHz. On the other + * hand, we can't make it much higher as it would lead to a large hard-lockup + * detection timeout on parts which are running slower (eg. 1GHz on + * Developerbox) and doesn't possess a cpufreq driver. + */ +#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz -- Gitee From 5ceb64de2528b8cd53405e1f3f9cd4bf68e09e86 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 19 May 2023 10:18:42 -0700 Subject: [PATCH 16/26] arm64: enable perf events based hard lockup detector mainline inclusion from mainline-v6.5-rc1 commit d7a0fe9ef6d6484fca4ba55c19091932337d4272 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID47CL CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d7a0fe9ef6d6484fca4ba55c19091932337d4272 ---------------------------------------------------------------------- With the recent feature added to enable perf events to use pseudo NMIs as interrupts on platforms which support GICv3 or later, its now been possible to enable hard lockup detector (or NMI watchdog) on arm64 platforms. So enable corresponding support. One thing to note here is that normally lockup detector is initialized just after the early initcalls but PMU on arm64 comes up much later as device_initcall(). To cope with that, override arch_perf_nmi_is_available() to let the watchdog framework know PMU not ready, and inform the framework to re-initialize lockup detection once PMU has been initialized. [dianders@chromium.org: only HAVE_HARDLOCKUP_DETECTOR_PERF if the PMU config is enabled] Link: https://lkml.kernel.org/r/20230523073952.1.I60217a63acc35621e13f10be16c0cd7c363caf8c@changeid Link: https://lkml.kernel.org/r/20230519101840.v5.18.Ia44852044cdcb074f387e80df6b45e892965d4a1@changeid Co-developed-by: Sumit Garg Signed-off-by: Sumit Garg Co-developed-by: Pingfan Liu Signed-off-by: Pingfan Liu Signed-off-by: Lecopzer Chen Signed-off-by: Douglas Anderson Cc: Andi Kleen Cc: Catalin Marinas Cc: Chen-Yu Tsai Cc: Christophe Leroy Cc: Colin Cross Cc: Daniel Thompson Cc: "David S. Miller" Cc: Guenter Roeck Cc: Ian Rogers Cc: Marc Zyngier Cc: Mark Rutland Cc: Masayoshi Mizuma Cc: Matthias Kaehlcke Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Petr Mladek Cc: Randy Dunlap Cc: "Ravi V. Shankar" Cc: Ricardo Neri Cc: Stephane Eranian Cc: Stephen Boyd Cc: Tzung-Bi Shih Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Shi Yang --- arch/arm64/Kconfig | 3 +++ arch/arm64/kernel/perf_event.c | 12 ++++++++++-- arch/arm64/kernel/watchdog_hld.c | 12 ++++++++++++ drivers/perf/arm_pmu.c | 5 +++++ include/linux/perf/arm_pmu.h | 3 +++ 5 files changed, 33 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 70d69cb28864..71f238c559a3 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -182,11 +182,14 @@ config ARM64 select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GCC_PLUGINS + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \ + HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IRQ_TIME_ACCOUNTING select HAVE_NMI select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS + select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cf0750d1d28a..846c4f98ea3a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -1340,10 +1341,17 @@ static struct platform_driver armv8_pmu_driver = { static int __init armv8_pmu_driver_init(void) { + int ret; + if (acpi_disabled) - return platform_driver_register(&armv8_pmu_driver); + ret = platform_driver_register(&armv8_pmu_driver); else - return arm_pmu_acpi_probe(armv8_pmuv3_init); + ret = arm_pmu_acpi_probe(armv8_pmuv3_init); + + if (!ret) + lockup_detector_retry_init(); + + return ret; } device_initcall(armv8_pmu_driver_init) diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c index 3dc9f4cc5ae9..678d733ca976 100644 --- a/arch/arm64/kernel/watchdog_hld.c +++ b/arch/arm64/kernel/watchdog_hld.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include +#include /* * Safe maximum CPU frequency in case a particular platform doesn't implement @@ -11,3 +13,13 @@ * Developerbox) and doesn't possess a cpufreq driver. */ #define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz + +bool __init arch_perf_nmi_is_available(void) +{ + /* + * hardlockup_detector_perf_init() will success even if Pseudo-NMI turns off, + * however, the pmu interrupts will act like a normal interrupt instead of + * NMI and the hardlockup detector would be broken. + */ + return arm_pmu_irq_is_nmi(); +} diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 743f52d94d92..ad679eed316f 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -755,6 +755,11 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu) return per_cpu(hw_events->irq, cpu); } +bool arm_pmu_irq_is_nmi(void) +{ + return has_nmi; +} + /* * PMU hardware loses all context when a CPU goes offline. * When a CPU is hotplugged back in, since some hardware registers are diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index c7a35d321272..2b9e7f04fcc2 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -192,6 +192,9 @@ static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } #ifdef CONFIG_CVM_HOST void arm_pmu_set_phys_irq(bool enable); #endif + +bool arm_pmu_irq_is_nmi(void); + /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); -- Gitee From b7fabb512d96a124a0cdfc37ca33e7140e39d938 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Fri, 29 Mar 2024 16:44:23 +0800 Subject: [PATCH 17/26] arm64: Enable hardware NMI for perf events NMI hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I9CQGL -------------------------------- Like pseudo NMI, also select HAVE_PERF_EVENTS_NMI for hardware NMI, and update the comment for arch_perf_nmi_is_available(). Signed-off-by: Jinjie Ruan Signed-off-by: Shi Yang --- arch/arm64/Kconfig | 2 +- arch/arm64/kernel/watchdog_hld.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 71f238c559a3..7c4fed0be9f3 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -189,7 +189,7 @@ config ARM64 select HAVE_NMI select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS - select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI + select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI || ARM64_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c index 678d733ca976..c4dd73dae922 100644 --- a/arch/arm64/kernel/watchdog_hld.c +++ b/arch/arm64/kernel/watchdog_hld.c @@ -17,9 +17,10 @@ bool __init arch_perf_nmi_is_available(void) { /* - * hardlockup_detector_perf_init() will success even if Pseudo-NMI turns off, - * however, the pmu interrupts will act like a normal interrupt instead of - * NMI and the hardlockup detector would be broken. + * hardlockup_detector_perf_init() will success even if Pseudo-NMI or + * Hardware NMI turns off. However, the pmu interrupts will act like + * a normal interrupt instead of NMI and the hardlockup detector would + * be broken. */ return arm_pmu_irq_is_nmi(); } -- Gitee From 3152d6612f1e3984c6f162fb5c1a1b0c64faaac3 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Mon, 19 Feb 2024 09:34:33 +0800 Subject: [PATCH 18/26] config: enable CONFIG_ARM64_NMI and CONFIG_HARDLOCKUP_DETECTOR_PERF for arm64 kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90N2C CVE: NA ---------------------------------------------------------------------- Set CONFIG_ARM64_NMI=y and CONFIG_HARDLOCKUP_DETECTOR_PERF=y in arm64 openeuler_defconfig. Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/configs/openeuler_defconfig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index f9d2080e7399..94144befa72a 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -548,6 +548,12 @@ CONFIG_ARM64_TWED=y CONFIG_ARM64_EPAN=y # end of ARMv8.7 architectural features +# +# ARMv8.8 architectural features +# +CONFIG_ARM64_NMI=y +# end of ARMv8.8 architectural features + CONFIG_ARM64_SVE=y CONFIG_ARM64_SME=y CONFIG_ARM64_MODULE_PLTS=y -- Gitee From 6f002b2fab54e92bcf75a7fb3951a3ad6902c3c9 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Tue, 5 Dec 2023 10:17:30 +0800 Subject: [PATCH 19/26] openeuler_defconfig: Enable SDEI Watchdog hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LQCC CVE: NA ------------------------------------------------- Enable SDEI Watchdog for ARM64. Signed-off-by: Xiongfeng Wang Signed-off-by: Shi Yang --- arch/arm64/configs/openeuler_defconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 94144befa72a..39d737505f42 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -7317,7 +7317,7 @@ CONFIG_LOCKUP_DETECTOR=y CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 -CONFIG_HARDLOCKUP_DETECTOR_PERF=y +# CONFIG_HARDLOCKUP_DETECTOR_PERF is not set # # ARM64 NMI watchdog configuration @@ -7330,6 +7330,7 @@ CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_CORELOCKUP_DETECTOR=y CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=1 +CONFIG_SDEI_WATCHDOG=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 # CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set -- Gitee From 3d3bfec699889d2ebfa4a9b6c2e4a237a13bb153 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Tue, 5 Dec 2023 10:17:28 +0800 Subject: [PATCH 20/26] init: only move down lockup_detector_init() when sdei_watchdog is enabled hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8LQCC CVE: NA ------------------------------------------------- When I enable CONFIG_DEBUG_PREEMPT and CONFIG_PREEMPT on X86, I got the following Call Trace: [ 3.341853] BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 [ 3.344392] caller is debug_smp_processor_id+0x17/0x20 [ 3.344395] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.10.0+ #398 [ 3.344397] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 [ 3.344399] Call Trace: [ 3.344410] dump_stack+0x60/0x76 [ 3.344412] check_preemption_disabled+0xba/0xc0 [ 3.344415] debug_smp_processor_id+0x17/0x20 [ 3.344422] hardlockup_detector_event_create+0xf/0x60 [ 3.344427] hardlockup_detector_perf_init+0xf/0x41 [ 3.344430] watchdog_nmi_probe+0xe/0x10 [ 3.344432] lockup_detector_init+0x22/0x5b [ 3.344437] kernel_init_freeable+0x20c/0x245 [ 3.344439] ? rest_init+0xd0/0xd0 [ 3.344441] kernel_init+0xe/0x110 [ 3.344446] ret_from_fork+0x22/0x30 It is because sched_init_smp() set 'current->nr_cpus_allowed' to possible cpu number, and check_preemption_disabled() failed. This issue is introduced by commit a79050434b45, which move down lockup_detector_init() after do_basic_setup(). Fix it by moving lockup_detector_init() to its origin place when sdei_watchdog is disabled. There is no problem when sdei_watchdog is enabled because watchdog_nmi_probe() is overridden in 'arch/arm64/kernel/watchdog_sdei.c' in this case. Fixes: a79050434b45 ("lockup_detector: init lockup detector after all the init_calls") Signed-off-by: Xiongfeng Wang Reviewed-by: Wei Li Signed-off-by: Chen Jun Signed-off-by: Shi Yang --- arch/arm64/kernel/watchdog_sdei.c | 2 +- include/linux/nmi.h | 2 ++ init/main.c | 6 +++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/watchdog_sdei.c b/arch/arm64/kernel/watchdog_sdei.c index 7fd8c2d3dd1b..549064480db0 100644 --- a/arch/arm64/kernel/watchdog_sdei.c +++ b/arch/arm64/kernel/watchdog_sdei.c @@ -21,7 +21,7 @@ #define SDEI_NMI_WATCHDOG_HWIRQ 29 static int sdei_watchdog_event_num; -static bool disable_sdei_nmi_watchdog; +bool disable_sdei_nmi_watchdog; static bool sdei_watchdog_registered; static DEFINE_PER_CPU(ktime_t, last_check_time); diff --git a/include/linux/nmi.h b/include/linux/nmi.h index cc32f888314e..2bb1adaccb49 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -246,8 +246,10 @@ int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *); #ifdef CONFIG_SDEI_WATCHDOG void sdei_watchdog_clear_eoi(void); +extern bool disable_sdei_nmi_watchdog; #else static inline void sdei_watchdog_clear_eoi(void) { } +#define disable_sdei_nmi_watchdog 1 #endif #endif diff --git a/init/main.c b/init/main.c index 2b466bd04110..2c652633f154 100644 --- a/init/main.c +++ b/init/main.c @@ -1544,6 +1544,8 @@ static noinline void __init kernel_init_freeable(void) rcu_init_tasks_generic(); do_pre_smp_initcalls(); + if (disable_sdei_nmi_watchdog) + lockup_detector_init(); smp_init(); sched_init_smp(); @@ -1555,7 +1557,9 @@ static noinline void __init kernel_init_freeable(void) do_basic_setup(); - lockup_detector_init(); + /* sdei_watchdog needs to be initialized after sdei_init */ + if (!disable_sdei_nmi_watchdog) + lockup_detector_init(); kunit_run_all_tests(); -- Gitee From 28a20f806393a380870f682c4d6fa1cede890e5f Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 1 Feb 2024 15:51:37 +0800 Subject: [PATCH 21/26] watchdog: Support watchdog_sdei coexist with existing watchdogs kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90N2C CVE: NA ---------------------------------------------------------------------- Currently we cannot use watchdog_{perf, buddy} if CONFIG_SDEI_WATCHDOG=y. Not all the platforms has watchdog_sdei so this patch tries to make watchdog_sdei coexist with other watchdogs. Only one watchdog will finally works. By default watchdog_sdei will be used. If boot with "disable_sdei_nmi_watchdog", other watchdogs will be used if probed. Signed-off-by: Yicong Yang Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/kernel/watchdog_sdei.c | 12 ++++++------ include/linux/nmi.h | 6 ++++++ kernel/watchdog.c | 13 ++++++++++--- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/watchdog_sdei.c b/arch/arm64/kernel/watchdog_sdei.c index 549064480db0..9285dd85f84e 100644 --- a/arch/arm64/kernel/watchdog_sdei.c +++ b/arch/arm64/kernel/watchdog_sdei.c @@ -25,7 +25,7 @@ bool disable_sdei_nmi_watchdog; static bool sdei_watchdog_registered; static DEFINE_PER_CPU(ktime_t, last_check_time); -int watchdog_sdei_enable(unsigned int cpu) +int sdei_watchdog_nmi_enable(unsigned int cpu) { int ret; @@ -49,7 +49,7 @@ int watchdog_sdei_enable(unsigned int cpu) return 0; } -void watchdog_sdei_disable(unsigned int cpu) +void sdei_watchdog_nmi_disable(unsigned int cpu) { int ret; @@ -110,7 +110,7 @@ void sdei_watchdog_clear_eoi(void) sdei_api_clear_eoi(SDEI_NMI_WATCHDOG_HWIRQ); } -int __init watchdog_sdei_probe(void) +int __init sdei_watchdog_nmi_probe(void) { int ret; @@ -154,9 +154,9 @@ int __init watchdog_sdei_probe(void) static struct watchdog_operations arch_watchdog_ops = { .watchdog_nmi_stop = &watchdog_nmi_stop, .watchdog_nmi_start = &watchdog_nmi_start, - .watchdog_nmi_probe = &watchdog_sdei_probe, - .watchdog_nmi_enable = &watchdog_sdei_enable, - .watchdog_nmi_disable = &watchdog_sdei_disable, + .watchdog_nmi_probe = &sdei_watchdog_nmi_probe, + .watchdog_nmi_enable = &sdei_watchdog_nmi_enable, + .watchdog_nmi_disable = &sdei_watchdog_nmi_disable, }; void watchdog_ops_init(void) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 2bb1adaccb49..678c95a620b5 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -245,10 +245,16 @@ int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *); #endif #ifdef CONFIG_SDEI_WATCHDOG +int sdei_watchdog_nmi_enable(unsigned int cpu); +void sdei_watchdog_nmi_disable(unsigned int cpu); void sdei_watchdog_clear_eoi(void); +int sdei_watchdog_nmi_probe(void); extern bool disable_sdei_nmi_watchdog; #else +static inline int sdei_watchdog_nmi_enable(unsigned int cpu) { return -ENODEV; } +static inline void sdei_watchdog_nmi_disable(unsigned int cpu) { } static inline void sdei_watchdog_clear_eoi(void) { } +static inline int sdei_watchdog_nmi_probe(void) { return -ENODEV; } #define disable_sdei_nmi_watchdog 1 #endif diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f924ab981a57..0b5a59497d95 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -487,8 +487,12 @@ static void watchdog_enable(unsigned int cpu) /* Initialize timestamp */ update_touch_ts(); /* Enable the perf event */ - if (watchdog_enabled & NMI_WATCHDOG_ENABLED) - nmi_watchdog_ops.watchdog_nmi_enable(cpu); + if (watchdog_enabled & NMI_WATCHDOG_ENABLED) { + if (disable_sdei_nmi_watchdog) + watchdog_nmi_enable(cpu); + else + sdei_watchdog_nmi_enable(cpu); + } } static void watchdog_disable(unsigned int cpu) @@ -502,7 +506,10 @@ static void watchdog_disable(unsigned int cpu) * between disabling the timer and disabling the perf event causes * the perf NMI to detect a false positive. */ - nmi_watchdog_ops.watchdog_nmi_disable(cpu); + if (disable_sdei_nmi_watchdog) + watchdog_nmi_disable(cpu); + else + sdei_watchdog_nmi_disable(cpu); hrtimer_cancel(hrtimer); wait_for_completion(this_cpu_ptr(&softlockup_completion)); } -- Gitee From a2dc232b337093cbc9543dfc1c680c87ea829f79 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 28 Mar 2024 11:00:55 +0800 Subject: [PATCH 22/26] watchdog: Fix call trace when failed to initialize sdei kunpeng inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I9KE9N CVE: NA ---------------------------------------------------------------------- 1509d06c9c41 ("init: only move down lockup_detector_init() when sdei_watchdog is enabled") In the above commit, sdei_watchdog needs to move down lockup_detector_init (), while nmi_watchdog does not. So when sdei_watchdog fails to be initialized, nmi_watchdog should not be initialized. [ 0.706631][ T1] SDEI NMI watchdog: Disable SDEI NMI Watchdog in VM [ 0.707405][ T1] ------------[ cut here ]------------ [ 0.708020][ T1] WARNING: CPU: 0 PID: 1 at kernel/watchdog_perf.c:117 hardlockup_detector_event_create+0x24/0x108 [ 0.709230][ T1] Modules linked in: [ 0.709665][ T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0 #1 [ 0.710700][ T1] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 0.711625][ T1] pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 0.712547][ T1] pc : hardlockup_detector_event_create+0x24/0x108 [ 0.713316][ T1] lr : watchdog_hardlockup_probe+0x28/0xa8 [ 0.714010][ T1] sp : ffff8000831cbdc0 [ 0.714501][ T1] pmr_save: 000000e0 [ 0.714957][ T1] x29: ffff8000831cbdc0 x28: 0000000000000000 x27: 0000000000000000 [ 0.715899][ T1] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 [ 0.716839][ T1] x23: 0000000000000000 x22: 0000000000000000 x21: ffff80008218fab0 [ 0.717775][ T1] x20: ffff8000821af000 x19: ffff0000c0261900 x18: 0000000000000020 [ 0.718713][ T1] x17: 00000000cb551c45 x16: ffff800082625e48 x15: ffffffffffffffff [ 0.719663][ T1] x14: 0000000000000000 x13: 205d315420202020 x12: 5b5d313336363037 [ 0.720607][ T1] x11: 00000000ffff7fff x10: 00000000ffff7fff x9 : ffff800081b5f630 [ 0.721590][ T1] x8 : 00000000000bffe8 x7 : c0000000ffff7fff x6 : 000000000005fff4 [ 0.722528][ T1] x5 : 00000000002bffa8 x4 : 0000000000000000 x3 : 0000000000000000 [ 0.723482][ T1] x2 : 0000000000000000 x1 : 0000000000000140 x0 : ffff0000c02c0000 [ 0.724426][ T1] Call trace: [ 0.724808][ T1] hardlockup_detector_event_create+0x24/0x108 [ 0.725535][ T1] watchdog_hardlockup_probe+0x28/0xa8 [ 0.726174][ T1] lockup_detector_init+0x110/0x158 [ 0.726776][ T1] kernel_init_freeable+0x208/0x288 [ 0.727387][ T1] kernel_init+0x2c/0x200 [ 0.727902][ T1] ret_from_fork+0x10/0x20 [ 0.728420][ T1] ---[ end trace 0000000000000000 ]--- Fixes: f61b11535a0b ("watchdog: Support watchdog_sdei coexist with existing watchdogs") Signed-off-by: Yicong Yang Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- kernel/watchdog.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 0b5a59497d95..59f3e63fbdfa 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -874,7 +874,8 @@ void __init lockup_detector_init(void) cpumask_copy(&watchdog_cpumask, housekeeping_cpumask(HK_FLAG_TIMER)); - if (!nmi_watchdog_ops.watchdog_nmi_probe()) + if ((!disable_sdei_nmi_watchdog && !sdei_watchdog_nmi_probe()) || + (disable_sdei_nmi_watchdog && !watchdog_nmi_probe())) nmi_watchdog_available = true; else allow_lockup_detector_init_retry = true; -- Gitee From 4a160c37fc787b667077101330d56781d76c4ce8 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 16 May 2024 17:20:16 +0800 Subject: [PATCH 23/26] irqchip/gic-v3: Fix one race condition due to NMI withdraw kunpeng inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I9QIWG CVE: NA ---------------------------------------------------------------------- The introduce of FEAT_NMI/FEAT_GICv3_NMI will cause a race problem that we may handle the normal interrupt in interrupt disabled context due to the withdraw of NMI interrupt. The flow will be like below: [interrupt disabled] <- normal interrupt pending, for example timer interrupt <- NMI occurs, ISR_EL1.nmi = 1 do_el1_interrupt() <- NMI withdraw, ISR_EL1.nmi = 0 ISR_EL1.nmi = 0, not an NMI interrupt gic_handle_irq() __gic_handle_irq_from_irqson() irqnr = gic_read_iar() <- Oops, ack and handle an normal interrupt in interrupt disabled context! Fix this by checking the interrupt status in __gic_handle_irq_from_irqson() and ignore the interrupt if we're in interrupt disabled context. Fixes: 0408b5bc4300 ("irqchip/gic-v3: Implement FEAT_GICv3_NMI support") Signed-off-by: Yicong Yang Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- drivers/irqchip/irq-gic-v3.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index c5d482b8429a..60fcf41408f8 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -812,6 +812,28 @@ static void __gic_handle_irq_from_irqson(struct pt_regs *regs) bool is_nmi; u32 irqnr; + /* + * We should enter here with interrupts disabled, otherwise we may met + * a race here with FEAT_NMI/FEAT_GICv3_NMI: + * + * [interrupt disabled] + * <- normal interrupt pending, for example timer interrupt + * <- NMI occurs, ISR_EL1.nmi = 1 + * do_el1_interrupt() + * <- NMI withdraw, ISR_EL1.nmi = 0 + * ISR_EL1.nmi = 0, not an NMI interrupt + * gic_handle_irq() + * __gic_handle_irq_from_irqson() + * irqnr = gic_read_iar() <- Oops, ack and handle an normal interrupt + * in interrupt disabled context! + * + * So if we met this case here, just return from the interrupt context. + * Since the interrupt is still pending, we can handle it once the + * interrupt re-enabled and it'll not be missing. + */ + if (!interrupts_enabled(regs)) + return; + irqnr = gic_read_iar(); is_nmi = gic_rpr_is_nmi_prio(); -- Gitee From 77d79e2a366fbbd5397e9bdcc1de5f09c69e3def Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 13 May 2025 19:20:00 +0800 Subject: [PATCH 24/26] watchdog/perf: Provide function for adjusting the event period driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IC7CQP ---------------------------------------------------------------------- Architecture's using perf events for hard lockup detection needs to convert the watchdog_thresh to the event's period, some architecture for example arm64 perform this conversion using the CPU's maximum frequency which will be acquired by cpufreq. However by the time the lockup detector's initialized the cpufreq driver may not be initialized, thus launch a watchdog with inaccurate period. Provide a function hardlockup_detector_perf_adjust_period() to allowing adjust the event period. Then architecture can update with more accurate period if cpufreq is initialized. Fixes: 94946f9eaac1 ("arm64: add hw_nmi_get_sample_period for preparation of lockup detector") Signed-off-by: Yicong Yang Signed-off-by: Hongye Lin Signed-off-by: Shi Yang --- include/linux/nmi.h | 2 ++ kernel/watchdog_hld.c | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 678c95a620b5..d2749fb3dc2d 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -115,6 +115,7 @@ extern void hardlockup_detector_perf_disable(void); extern void hardlockup_detector_perf_enable(void); extern void hardlockup_detector_perf_cleanup(void); extern int hardlockup_detector_perf_init(void); +extern void hardlockup_detector_perf_adjust_period(int cpu, u64 period); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } @@ -126,6 +127,7 @@ static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } # else static inline int hardlockup_detector_perf_init(void) { return 0; } # endif +static inline void hardlockup_detector_perf_adjust_period(int cpu, u64 period) { } #endif #ifdef CONFIG_CORELOCKUP_DETECTOR diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index e4f158f56eda..ba5593d7b619 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -463,6 +463,29 @@ void hardlockup_detector_perf_cleanup(void) cpumask_clear(&dead_events_mask); } +/** + * hardlockup_detector_perf_adjust_period - Adjust the event period due + * to cpu frequency change + * @cpu: The CPU whose event period will be adjusted + * @period: The target period to be set + */ +void hardlockup_detector_perf_adjust_period(int cpu, u64 period) +{ + struct perf_event *event = per_cpu(watchdog_ev, cpu); + + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return; + + if (!event) + return; + + if (event->attr.sample_period == period) + return; + + if (perf_event_period(event, period)) + pr_err("failed to change period to %llu\n", period); +} + /** * hardlockup_detector_perf_stop - Globally stop watchdog events * -- Gitee From 9de848a594470fcc8e6570a64f3134a42614271d Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 13 May 2025 19:20:01 +0800 Subject: [PATCH 25/26] arm64/watchdog_hld: Add a cpufreq notifier for update watchdog thresh driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IC7CQP ---------------------------------------------------------------------- arm64 depends on the cpufreq driver to gain the maximum cpu frequency to convert the watchdog_thresh to perf event period. cpufreq drivers like cppc_cpufreq will be initialized lately after the initializing of the hard lockup detector so just use a safe cpufreq which will be inaccurency. Use a cpufreq notifier to adjust the event's period to a more accurate one. Fixes: 94946f9eaac1 ("arm64: add hw_nmi_get_sample_period for preparation of lockup detector") Signed-off-by: Yicong Yang Signed-off-by: Hongye Lin --- arch/arm64/kernel/watchdog_hld.c | 58 ++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c index c4dd73dae922..caed3383115b 100644 --- a/arch/arm64/kernel/watchdog_hld.c +++ b/arch/arm64/kernel/watchdog_hld.c @@ -24,3 +24,61 @@ bool __init arch_perf_nmi_is_available(void) */ return arm_pmu_irq_is_nmi(); } + +static int watchdog_perf_update_period(void *data) +{ + int cpu = raw_smp_processor_id(); + u64 max_cpu_freq, new_period; + + max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL; + if (!max_cpu_freq) + return 0; + + new_period = watchdog_thresh * max_cpu_freq; + hardlockup_detector_perf_adjust_period(cpu, new_period); + + return 0; +} + +static int watchdog_freq_notifier_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_policy *policy = data; + int cpu; + + if (val != CPUFREQ_CREATE_POLICY) + return NOTIFY_DONE; + + /* + * Let each online CPU related to the policy update the period by their + * own. This will serialize with the framework on start/stop the lockup + * detector (softlockup_{start,stop}_all) and avoid potential race + * condition. Otherwise we may have below theoretical race condition: + * (core 0/1 share the same policy) + * [core 0] [core 1] + * hardlockup_detector_event_create() + * hw_nmi_get_sample_period() + * (cpufreq registered, notifier callback invoked) + * watchdog_freq_notifier_callback() + * watchdog_perf_update_period() + * (since core 1's event's not yet created, + * the period is not set) + * perf_event_create_kernel_counter() + * (event's period is SAFE_MAX_CPU_FREQ) + */ + for_each_cpu(cpu, policy->cpus) + smp_call_on_cpu(cpu, watchdog_perf_update_period, NULL, false); + + return NOTIFY_DONE; +} + +static struct notifier_block watchdog_freq_notifier = { + .notifier_call = watchdog_freq_notifier_callback, +}; + +static int __init init_watchdog_freq_notifier(void) +{ + return cpufreq_register_notifier(&watchdog_freq_notifier, + CPUFREQ_POLICY_NOTIFIER); +} +core_initcall(init_watchdog_freq_notifier); -- Gitee From f858f05edd06afa7f4783e28371a708e19796569 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 5 Aug 2025 15:24:10 +0800 Subject: [PATCH 26/26] sdei_watchdog: use lockup_detector_retry_init() to init sdei watchdog hulk inclusion category: other bugzilla: https://gitee.com/openeuler/kernel/issues/I8LQCC CVE: NA ------------------------------------------------- sdei watchdog needs to be initialized after sdei_init, so commit 1509d06c9c41 ("init: only move down lockup_detector_init() when sdei_watchdog is enabled") move down the lockup_detector_init(). Now Commit 930d8f8dbab9 ("watchdog/perf: adapt the watchdog_perf interface for async model") provide an API lockup_detector_retry_init() for anyone who needs to delayed init lockup detector, so use this API to delay init sdei watchdog. Signed-off-by: Yang Yingliang Signed-off-by: Shi Yang --- arch/arm64/kernel/perf_event.c | 2 +- arch/arm64/kernel/watchdog_sdei.c | 9 +++++++++ init/main.c | 7 +------ kernel/watchdog.c | 10 +++++++--- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 846c4f98ea3a..98112fd6d2e8 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1348,7 +1348,7 @@ static int __init armv8_pmu_driver_init(void) else ret = arm_pmu_acpi_probe(armv8_pmuv3_init); - if (!ret) + if (!ret && disable_sdei_nmi_watchdog) lockup_detector_retry_init(); return ret; diff --git a/arch/arm64/kernel/watchdog_sdei.c b/arch/arm64/kernel/watchdog_sdei.c index 9285dd85f84e..9b21117c5f96 100644 --- a/arch/arm64/kernel/watchdog_sdei.c +++ b/arch/arm64/kernel/watchdog_sdei.c @@ -164,3 +164,12 @@ void watchdog_ops_init(void) if (!disable_sdei_nmi_watchdog) nmi_watchdog_ops = arch_watchdog_ops; } +static int __init sdei_watchdog_hardlockup_init(void) +{ + /* sdei_watchdog needs to be initialized after sdei_init */ + if (!disable_sdei_nmi_watchdog) + lockup_detector_retry_init(); + + return 0; +} +device_initcall(sdei_watchdog_hardlockup_init) diff --git a/init/main.c b/init/main.c index 2c652633f154..23c04e808dba 100644 --- a/init/main.c +++ b/init/main.c @@ -1544,8 +1544,7 @@ static noinline void __init kernel_init_freeable(void) rcu_init_tasks_generic(); do_pre_smp_initcalls(); - if (disable_sdei_nmi_watchdog) - lockup_detector_init(); + lockup_detector_init(); smp_init(); sched_init_smp(); @@ -1557,10 +1556,6 @@ static noinline void __init kernel_init_freeable(void) do_basic_setup(); - /* sdei_watchdog needs to be initialized after sdei_init */ - if (!disable_sdei_nmi_watchdog) - lockup_detector_init(); - kunit_run_all_tests(); console_on_rootfs(); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 59f3e63fbdfa..3f5c3f5688e1 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -818,7 +818,12 @@ static void __init lockup_detector_delay_init(struct work_struct *work) { int ret; - ret = watchdog_nmi_probe(); + if (disable_sdei_nmi_watchdog) { + ret = watchdog_nmi_probe(); + } else { + ret = sdei_watchdog_nmi_probe(); + } + if (ret) { pr_info("Delayed init of the lockup detector failed: %d\n", ret); pr_info("Hard watchdog permanently disabled\n"); @@ -874,8 +879,7 @@ void __init lockup_detector_init(void) cpumask_copy(&watchdog_cpumask, housekeeping_cpumask(HK_FLAG_TIMER)); - if ((!disable_sdei_nmi_watchdog && !sdei_watchdog_nmi_probe()) || - (disable_sdei_nmi_watchdog && !watchdog_nmi_probe())) + if (disable_sdei_nmi_watchdog && !watchdog_nmi_probe()) nmi_watchdog_available = true; else allow_lockup_detector_init_retry = true; -- Gitee