diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index 7552dbc1cc54c76d9b6ea3b6983d9ee800453d84..92cf9645c3d116b89280d836cdeb38aae7bd808e 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -270,6 +270,12 @@ Before jumping into the kernel, the following conditions must be met: having 0b1 set for the corresponding bit for each of the auxiliary counters present. + For CPUs with Non-maskable Interrupts (FEAT_NMI): + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.TALLINT must be initialised to 0b0. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index 21f2ec96cc96c5707161373e150f3ad0e1963ba3..340352c772521163a52ba258d674d59468cd4655 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -48,6 +48,7 @@ static inline u32 read_ ## a64(void) \ return read_sysreg(a32); \ } \ +CPUIF_MAP(ICC_EOIR1, ICC_EOIR1_EL1) CPUIF_MAP(ICC_PMR, ICC_PMR_EL1) CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1) CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1) @@ -63,12 +64,6 @@ CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1) /* Low-level accessors */ -static inline void gic_write_eoir(u32 irq) -{ - write_sysreg(irq, ICC_EOIR1); - isb(); -} - static inline void gic_write_dir(u32 val) { write_sysreg(val, ICC_DIR); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0f1accb7355f18ba2e2f5024b498957b17333d17..7c4fed0be9f3f73ae8664c5c62d27203be373e60 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -182,11 +182,14 @@ config ARM64 select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GCC_PLUGINS + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \ + HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IRQ_TIME_ACCOUNTING select HAVE_NMI select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS + select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI || ARM64_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API @@ -1990,6 +1993,23 @@ config ARM64_EPAN if the cpu does not implement the feature. endmenu +menu "ARMv8.8 architectural features" + +config ARM64_NMI + bool "Enable support for Non-maskable Interrupts (NMI)" + default y + help + Non-maskable interrupts are an architecture and GIC feature + which allow the system to configure some interrupts to be + configured to have superpriority, allowing them to be handled + before other interrupts and masked for shorter periods of time. + + The feature is detected at runtime, and will remain disabled + if the cpu does not implement the feature. It will also be + disabled if pseudo NMIs are enabled at runtime. + +endmenu # "ARMv8.8 architectural features" + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index f9d2080e7399396e54adb48ec8797480f3268f78..39d737505f42cbb3aff6ffa63657b4d597d55280 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -548,6 +548,12 @@ CONFIG_ARM64_TWED=y CONFIG_ARM64_EPAN=y # end of ARMv8.7 architectural features +# +# ARMv8.8 architectural features +# +CONFIG_ARM64_NMI=y +# end of ARMv8.8 architectural features + CONFIG_ARM64_SVE=y CONFIG_ARM64_SME=y CONFIG_ARM64_MODULE_PLTS=y @@ -7311,7 +7317,7 @@ CONFIG_LOCKUP_DETECTOR=y CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 -CONFIG_HARDLOCKUP_DETECTOR_PERF=y +# CONFIG_HARDLOCKUP_DETECTOR_PERF is not set # # ARM64 NMI watchdog configuration @@ -7324,6 +7330,7 @@ CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_CORELOCKUP_DETECTOR=y CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=1 +CONFIG_SDEI_WATCHDOG=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 # CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 63db8e5ec9f8a915a7cbddfddaba6b4e08049d48..7dc2ab36610ffb109921f4f8d7af07f2ef6ebb2c 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -37,12 +37,30 @@ msr daifset, #0xf .endm + .macro disable_allint +#ifdef CONFIG_ARM64_NMI +alternative_if ARM64_HAS_NMI + msr_s SYS_ALLINT_SET, xzr +alternative_else_nop_endif +#endif + .endm + + .macro enable_allint +#ifdef CONFIG_ARM64_NMI +alternative_if ARM64_HAS_NMI + msr_s SYS_ALLINT_CLR, xzr +alternative_else_nop_endif +#endif + .endm + .macro disable_daif + disable_allint msr daifset, #0xf .endm .macro enable_daif msr daifclr, #0xf + enable_allint .endm .macro restore_daif, flags:req diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 0da53f0c4fbf645576af228432e216b40736f03d..a787035d6affda5d9fb530de677eec6b6f7d757c 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -86,5 +86,7 @@ #define ARM64_WORKAROUND_PHYTIUM_FT3386 78 #define ARM64_NCAPS 80 +#define ARM64_HAS_NMI 81 +#define ARM64_USES_NMI 82 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index dd7d18cfbd1e4dae89e2e8131979718d4b4abff4..b21f13aee50d2874359d8871e17f81d230dc491b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -770,6 +770,12 @@ static __always_inline bool system_uses_irq_prio_masking(void) cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); } +static __always_inline bool system_uses_nmi(void) +{ + return IS_ENABLED(CONFIG_ARM64_NMI) && + cpus_have_const_cap(ARM64_USES_NMI); +} + static inline bool system_supports_mte(void) { return IS_ENABLED(CONFIG_ARM64_MTE) && diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index cfdde3a56805959b8c074455ef7eff783a1ad2d2..f1c3d2f204f85aa19ab9c1b5f7cfbcad71ba2765 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #define DAIF_PROCCTX 0 @@ -35,6 +36,9 @@ static inline void local_daif_mask(void) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + if (system_uses_nmi()) + _allint_set(); + trace_hardirqs_off(); } @@ -116,6 +120,14 @@ static inline void local_daif_restore(unsigned long flags) write_sysreg(flags, daif); + /* If we can take asynchronous errors we can take NMIs */ + if (system_uses_nmi()) { + if (flags & PSR_A_BIT) + _allint_set(); + else + _allint_clear(); + } + if (irq_disabled) trace_hardirqs_off(); } @@ -141,4 +153,5 @@ static inline void local_daif_inherit(struct pt_regs *regs) */ write_sysreg(flags, daif); } + #endif diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index ff328e5bbb75721e93d1df93bf43cf55aae6f492..14b2fad48fc9ca3f6c457dd2a3eb1b77ada1e8f3 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -15,12 +15,20 @@ * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'dai' * order: * Masking debug exceptions causes all other exceptions to be masked too/ - * Masking SError masks irq, but not debug exceptions. Masking irqs has no - * side effects for other flags. Keeping to this order makes it easier for - * entry.S to know which exceptions should be unmasked. + * Masking SError masks IRQ/FIQ, but not debug exceptions. IRQ and FIQ are + * always masked and unmasked together, and have no side effects for other + * flags. Keeping to this order makes it easier for entry.S to know which + * exceptions should be unmasked. * - * FIQ is never expected, but we mask it when we disable debug exceptions, and - * unmask it at all other times. + * With the addition of the FEAT_NMI extension we gain an additional + * class of superpriority IRQ/FIQ which is separately masked with a + * choice of modes controlled by SCTLR_ELn.{SPINTMASK,NMI}. Linux + * sets SPINTMASK to 0 and NMI to 1 which results in ALLINT.ALLINT + * masking both superpriority interrupts and IRQ/FIQ regardless of the + * I and F settings. Since these superpriority interrupts are being + * used as NMIs we do not include them in the interrupt masking here, + * anything that requires that NMIs be masked needs to explicitly do + * so. */ /* diff --git a/arch/arm64/include/asm/nmi.h b/arch/arm64/include/asm/nmi.h index 4cd14b6af88b9658c7552a73618fb5a99f87d93e..c51b6cb25221a30594bc91e2e55b843a91244a54 100644 --- a/arch/arm64/include/asm/nmi.h +++ b/arch/arm64/include/asm/nmi.h @@ -1,4 +1,7 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 ARM Ltd. + */ #ifndef __ASM_NMI_H #define __ASM_NMI_H @@ -14,4 +17,15 @@ void dynamic_ipi_setup(int cpu); void dynamic_ipi_teardown(int cpu); #endif /* !__ASSEMBLER__ */ + +static __always_inline void _allint_clear(void) +{ + asm volatile(__msr_s(SYS_ALLINT_CLR, "xzr")); +} + +static __always_inline void _allint_set(void) +{ + asm volatile(__msr_s(SYS_ALLINT_SET, "xzr")); +} + #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9705f7abd42871475d95408396aee49b8c9f34b2..917d4ca495b1f0474e1741f8616ac233be0183ba 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -129,6 +129,8 @@ * System registers, organised loosely by encoding but grouped together * where the architected name contains an index. e.g. ID_MMFR_EL1. */ +#define SYS_ALLINT_CLR sys_reg(0, 1, 4, 0, 0) +#define SYS_ALLINT_SET sys_reg(0, 1, 4, 1, 0) #define SYS_SVCR_SMSTOP_SM_EL0 sys_reg(0, 3, 4, 2, 3) #define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3) #define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3) @@ -338,7 +340,10 @@ #define SYS_SPSR_EL1 sys_reg(3, 0, 4, 0, 0) #define SYS_ELR_EL1 sys_reg(3, 0, 4, 0, 1) +#define SYS_ALLINT sys_reg(3, 0, 4, 3, 0) + #define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) +#define SYS_ICC_NMIAR1_EL1 sys_reg(3, 0, 12, 9, 5) #define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0) #define SYS_AFSR1_EL1 sys_reg(3, 0, 5, 1, 1) @@ -361,6 +366,10 @@ #define SYS_PAR_EL1_F BIT(0) #define SYS_PAR_EL1_FST GENMASK(6, 1) +#define HCRX_EL2_TALLINT_MASK GENMASK(6, 6) +#define ALLINT_ALLINT BIT(13) +#define ISR_EL1_IS BIT(10) + /*** Statistical Profiling Extension ***/ #define SMPRI_EL1_PRIORITY_MASK 0xf @@ -1196,6 +1205,8 @@ #define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_SPINTMASK (BIT(62)) +#define SCTLR_EL1_NMI (BIT(61)) #define SCTLR_EL1_BT1 (BIT(36)) #define SCTLR_EL1_BT0 (BIT(35)) #define SCTLR_EL1_UCI (BIT(26)) @@ -1341,6 +1352,7 @@ #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 /* id_aa64pfr1 */ +#define ID_AA64PFR1_NMI_SHIFT 36 #define ID_AA64PFR1_SME_SHIFT 24 #define ID_AA64PFR1_MPAMFRAC_SHIFT 16 #define ID_AA64PFR1_RASFRAC_SHIFT 12 @@ -1354,6 +1366,9 @@ #define ID_AA64PFR1_BT_BTI 0x1 #define ID_AA64PFR1_SME 1 +#define ID_AA64PFR1_NMI_IMP_DEF 0x1 +#define ID_AA64PFR1_NMI_IMP_NI 0x0 + #define ID_AA64PFR1_MTE_NI 0x0 #define ID_AA64PFR1_MTE_EL0 0x1 #define ID_AA64PFR1_MTE 0x2 diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 1b635613df55274ebd7aa4d89bfdabdac297595d..a5fb42a6c8d7a914b0df5d5b82da69cfb6539316 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o obj-$(CONFIG_CPU_IDLE) += cpuidle.o diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9cb9a209b63ae401c30d92a027bc5550e1207e91..a845a76f8c6b4c957db8f1d4ff7a3f134b5b54bd 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -84,6 +84,7 @@ #include #include #include +#include #include #include #include @@ -245,6 +246,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_NMI_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0), @@ -2054,9 +2056,11 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_E0PD */ -#ifdef CONFIG_ARM64_PSEUDO_NMI +#if IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) || IS_ENABLED(CONFIG_ARM64_NMI) bool enable_pseudo_nmi; +#endif +#ifdef CONFIG_ARM64_PSEUDO_NMI static int __init early_enable_pseudo_nmi(char *p) { return strtobool(p, &enable_pseudo_nmi); @@ -2070,6 +2074,41 @@ static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry, } #endif +#ifdef CONFIG_ARM64_NMI +static bool use_nmi(const struct arm64_cpu_capabilities *entry, int scope) +{ + if (!has_cpuid_feature(entry, scope)) + return false; + + /* + * Having both real and pseudo NMIs enabled simultaneously is + * likely to cause confusion. Since pseudo NMIs must be + * enabled with an explicit command line option, if the user + * has set that option on a system with real NMIs for some + * reason assume they know what they're doing. + */ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && enable_pseudo_nmi) { + pr_info("Pseudo NMI enabled, not using architected NMI\n"); + return false; + } + + return true; +} + +static void nmi_enable(const struct arm64_cpu_capabilities *__unused) +{ + /* + * Enable use of NMIs controlled by ALLINT, SPINTMASK should + * be clear by default but make it explicit that we are using + * this mode. Ensure that ALLINT is clear first in order to + * avoid leaving things masked. + */ + _allint_clear(); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPINTMASK, SCTLR_EL1_NMI); + isb(); +} +#endif + #ifdef CONFIG_ARM64_BTI static void bti_enable(const struct arm64_cpu_capabilities *__unused) { @@ -2736,6 +2775,32 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = fa64_kernel_enable, }, #endif /* CONFIG_ARM64_SME */ +#ifdef CONFIG_ARM64_NMI + { + .desc = "Non-maskable Interrupts present", + .capability = ARM64_HAS_NMI, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_NMI_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64PFR1_NMI_IMP_DEF, + .matches = has_cpuid_feature, + }, + { + .desc = "Non-maskable Interrupts enabled", + .capability = ARM64_USES_NMI, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_NMI_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64PFR1_NMI_IMP_DEF, + .matches = use_nmi, + .cpu_enable = nmi_enable, + }, +#endif + #ifdef CONFIG_FAST_SYSCALL { .desc = "Xcall Support", diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4704d00d38131f01a38bfe392a203c389b86452d..eb25a4250e54d50a9879e51d6f17feeb921c0258 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -557,11 +557,24 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr sctlr_el2, x0 #ifdef CONFIG_ARM64_VHE + mrs x2, id_aa64pfr1_el1 + ubfx x2, x2, #ID_AA64PFR1_NMI_SHIFT, #4 + cbz x2, .Lskip_nmi +.Linit_nmi: + mrs x2, id_aa64mmfr1_el1 + ubfx x2, x2, #ID_AA64MMFR1_HCX_SHIFT, #4 + cbz x2, .Lskip_nmi + + mrs_s x1, SYS_HCRX_EL2 + bic x1, x1, #HCRX_EL2_TALLINT_MASK // Don't trap ALLINT + msr_s SYS_HCRX_EL2, x1 + /* * Check for VHE being present. For the rest of the EL2 setup, * x2 being non-zero indicates that we do have VHE, and that the * kernel is intended to run at EL2. */ +.Lskip_nmi: mrs x2, id_aa64mmfr1_el1 ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 #else diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cf0750d1d28a83a6db8030be87103c0a56baced1..98112fd6d2e8b9e52034d8522dedc4ae19e1a672 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -1340,10 +1341,17 @@ static struct platform_driver armv8_pmu_driver = { static int __init armv8_pmu_driver_init(void) { + int ret; + if (acpi_disabled) - return platform_driver_register(&armv8_pmu_driver); + ret = platform_driver_register(&armv8_pmu_driver); else - return arm_pmu_acpi_probe(armv8_pmuv3_init); + ret = arm_pmu_acpi_probe(armv8_pmuv3_init); + + if (!ret && disable_sdei_nmi_watchdog) + lockup_detector_retry_init(); + + return ret; } device_initcall(armv8_pmu_driver_init) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 14300c9e06d55ec2e4c8da3e6c1eac9d515cd0cb..cf6ed56593a1c35b229e436ca4cd0ab78d80b680 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -737,6 +737,15 @@ core_initcall(tagged_addr_init); asmlinkage void __sched arm64_preempt_schedule_irq(void) { + /* + * Architected NMIs are unmasked prior to handling regular + * IRQs and masked while handling FIQs. If ALLINT is set then + * we are in a NMI or other preempting context so skip + * preemption. + */ + if (system_uses_nmi() && (read_sysreg_s(SYS_ALLINT) & ALLINT_ALLINT)) + return; + lockdep_assert_irqs_disabled(); /* diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c new file mode 100644 index 0000000000000000000000000000000000000000..caed3383115b389ec29cabab8b821573745caaa2 --- /dev/null +++ b/arch/arm64/kernel/watchdog_hld.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +/* + * Safe maximum CPU frequency in case a particular platform doesn't implement + * cpufreq driver. Although, architecture doesn't put any restrictions on + * maximum frequency but 5 GHz seems to be safe maximum given the available + * Arm CPUs in the market which are clocked much less than 5 GHz. On the other + * hand, we can't make it much higher as it would lead to a large hard-lockup + * detection timeout on parts which are running slower (eg. 1GHz on + * Developerbox) and doesn't possess a cpufreq driver. + */ +#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz + +bool __init arch_perf_nmi_is_available(void) +{ + /* + * hardlockup_detector_perf_init() will success even if Pseudo-NMI or + * Hardware NMI turns off. However, the pmu interrupts will act like + * a normal interrupt instead of NMI and the hardlockup detector would + * be broken. + */ + return arm_pmu_irq_is_nmi(); +} + +static int watchdog_perf_update_period(void *data) +{ + int cpu = raw_smp_processor_id(); + u64 max_cpu_freq, new_period; + + max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL; + if (!max_cpu_freq) + return 0; + + new_period = watchdog_thresh * max_cpu_freq; + hardlockup_detector_perf_adjust_period(cpu, new_period); + + return 0; +} + +static int watchdog_freq_notifier_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_policy *policy = data; + int cpu; + + if (val != CPUFREQ_CREATE_POLICY) + return NOTIFY_DONE; + + /* + * Let each online CPU related to the policy update the period by their + * own. This will serialize with the framework on start/stop the lockup + * detector (softlockup_{start,stop}_all) and avoid potential race + * condition. Otherwise we may have below theoretical race condition: + * (core 0/1 share the same policy) + * [core 0] [core 1] + * hardlockup_detector_event_create() + * hw_nmi_get_sample_period() + * (cpufreq registered, notifier callback invoked) + * watchdog_freq_notifier_callback() + * watchdog_perf_update_period() + * (since core 1's event's not yet created, + * the period is not set) + * perf_event_create_kernel_counter() + * (event's period is SAFE_MAX_CPU_FREQ) + */ + for_each_cpu(cpu, policy->cpus) + smp_call_on_cpu(cpu, watchdog_perf_update_period, NULL, false); + + return NOTIFY_DONE; +} + +static struct notifier_block watchdog_freq_notifier = { + .notifier_call = watchdog_freq_notifier_callback, +}; + +static int __init init_watchdog_freq_notifier(void) +{ + return cpufreq_register_notifier(&watchdog_freq_notifier, + CPUFREQ_POLICY_NOTIFIER); +} +core_initcall(init_watchdog_freq_notifier); diff --git a/arch/arm64/kernel/watchdog_sdei.c b/arch/arm64/kernel/watchdog_sdei.c index 7fd8c2d3dd1bcc4a31f470b91776fa0031cae0e1..9b21117c5f961a928392cc225b0aff6628273a27 100644 --- a/arch/arm64/kernel/watchdog_sdei.c +++ b/arch/arm64/kernel/watchdog_sdei.c @@ -21,11 +21,11 @@ #define SDEI_NMI_WATCHDOG_HWIRQ 29 static int sdei_watchdog_event_num; -static bool disable_sdei_nmi_watchdog; +bool disable_sdei_nmi_watchdog; static bool sdei_watchdog_registered; static DEFINE_PER_CPU(ktime_t, last_check_time); -int watchdog_sdei_enable(unsigned int cpu) +int sdei_watchdog_nmi_enable(unsigned int cpu) { int ret; @@ -49,7 +49,7 @@ int watchdog_sdei_enable(unsigned int cpu) return 0; } -void watchdog_sdei_disable(unsigned int cpu) +void sdei_watchdog_nmi_disable(unsigned int cpu) { int ret; @@ -110,7 +110,7 @@ void sdei_watchdog_clear_eoi(void) sdei_api_clear_eoi(SDEI_NMI_WATCHDOG_HWIRQ); } -int __init watchdog_sdei_probe(void) +int __init sdei_watchdog_nmi_probe(void) { int ret; @@ -154,9 +154,9 @@ int __init watchdog_sdei_probe(void) static struct watchdog_operations arch_watchdog_ops = { .watchdog_nmi_stop = &watchdog_nmi_stop, .watchdog_nmi_start = &watchdog_nmi_start, - .watchdog_nmi_probe = &watchdog_sdei_probe, - .watchdog_nmi_enable = &watchdog_sdei_enable, - .watchdog_nmi_disable = &watchdog_sdei_disable, + .watchdog_nmi_probe = &sdei_watchdog_nmi_probe, + .watchdog_nmi_enable = &sdei_watchdog_nmi_enable, + .watchdog_nmi_disable = &sdei_watchdog_nmi_disable, }; void watchdog_ops_init(void) @@ -164,3 +164,12 @@ void watchdog_ops_init(void) if (!disable_sdei_nmi_watchdog) nmi_watchdog_ops = arch_watchdog_ops; } +static int __init sdei_watchdog_hardlockup_init(void) +{ + /* sdei_watchdog needs to be initialized after sdei_init */ + if (!disable_sdei_nmi_watchdog) + lockup_detector_retry_init(); + + return 0; +} +device_initcall(sdei_watchdog_hardlockup_init) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 39a854ec93cac5016088f3e6905ff2580136b633..60fcf41408f8b614f046b0f1f0824ef9dff2e88e 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -61,6 +61,7 @@ struct gic_chip_data { u32 nr_redist_regions; u64 flags; bool has_rss; + bool has_nmi; unsigned int ppi_nr; struct partition_desc **ppi_descs; }; @@ -142,6 +143,20 @@ static DEFINE_PER_CPU(bool, has_rss); /* Our default, arbitrary priority value. Linux only uses one anyway. */ #define DEFAULT_PMR_VALUE 0xf0 +#ifdef CONFIG_ARM64 +#include + +static inline bool has_v3_3_nmi(void) +{ + return gic_data.has_nmi && system_uses_nmi(); +} +#else +static inline bool has_v3_3_nmi(void) +{ + return false; +} +#endif + phys_addr_t get_gicr_paddr(int cpu) { return (per_cpu_ptr(gic_data.rdists.rdist, cpu))->phys_base; @@ -334,6 +349,42 @@ static int gic_peek_irq(struct irq_data *d, u32 offset) return !!(readl_relaxed(base + offset + (index / 32) * 4) & mask); } +static DEFINE_RAW_SPINLOCK(irq_controller_lock); + +static void gic_irq_configure_nmi(struct irq_data *d, bool enable) +{ + void __iomem *base, *addr; + u32 offset, index, mask, val; + + offset = convert_offset_index(d, GICD_INMIR, &index); + mask = 1 << (index % 32); + + if (gic_irq_in_rdist(d)) + base = gic_data_rdist_sgi_base(); + else + base = gic_data.dist_base; + + addr = base + offset + (index / 32) * 4; + + raw_spin_lock(&irq_controller_lock); + + val = readl_relaxed(addr); + val = enable ? (val | mask) : (val & ~mask); + writel_relaxed(val, addr); + + raw_spin_unlock(&irq_controller_lock); +} + +static void gic_irq_enable_nmi(struct irq_data *d) +{ + gic_irq_configure_nmi(d, true); +} + +static void gic_irq_disable_nmi(struct irq_data *d) +{ + gic_irq_configure_nmi(d, false); +} + static void gic_poke_irq(struct irq_data *d, u32 offset) { void (*rwp_wait)(void); @@ -380,6 +431,12 @@ static void gic_unmask_irq(struct irq_data *d) gic_poke_irq(d, GICD_ISENABLER); } +static inline bool gic_supports_pseudo_nmis(void) +{ + return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && + static_branch_likely(&supports_pseudo_nmis); +} + static int gic_irq_set_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool val) { @@ -462,7 +519,7 @@ static int gic_irq_nmi_setup(struct irq_data *d) struct irq_desc *desc = irq_to_desc(d->irq); u32 idx; - if (!gic_supports_nmi()) + if (!gic_supports_pseudo_nmis() && !has_v3_3_nmi()) return -EINVAL; if (gic_peek_irq(d, GICD_ISENABLER)) { @@ -496,7 +553,10 @@ static int gic_irq_nmi_setup(struct irq_data *d) break; } - gic_irq_set_prio(d, GICD_INT_NMI_PRI); + if (has_v3_3_nmi()) + gic_irq_enable_nmi(d); + else + gic_irq_set_prio(d, GICD_INT_NMI_PRI); return 0; } @@ -506,7 +566,7 @@ static void gic_irq_nmi_teardown(struct irq_data *d) struct irq_desc *desc = irq_to_desc(d->irq); u32 idx; - if (WARN_ON(!gic_supports_nmi())) + if (WARN_ON(!gic_supports_pseudo_nmis() && !has_v3_3_nmi())) return; if (gic_peek_irq(d, GICD_ISENABLER)) { @@ -538,12 +598,16 @@ static void gic_irq_nmi_teardown(struct irq_data *d) break; } - gic_irq_set_prio(d, GICD_INT_DEF_PRI); + if (has_v3_3_nmi()) + gic_irq_disable_nmi(d); + else + gic_irq_set_prio(d, GICD_INT_DEF_PRI); } static void gic_eoi_irq(struct irq_data *d) { - gic_write_eoir(gic_irq(d)); + write_gicreg(gic_irq(d), ICC_EOIR1_EL1); + isb(); } static void gic_eoimode1_eoi_irq(struct irq_data *d) @@ -627,35 +691,44 @@ static void gic_deactivate_unhandled(u32 irqnr) if (irqnr < 8192) gic_write_dir(irqnr); } else { - gic_write_eoir(irqnr); + write_gicreg(irqnr, ICC_EOIR1_EL1); + isb(); } } -static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) +/* + * Follow a read of the IAR with any HW maintenance that needs to happen prior + * to invoking the relevant IRQ handler. We must do two things: + * + * (1) Ensure instruction ordering between a read of IAR and subsequent + * instructions in the IRQ handler using an ISB. + * + * It is possible for the IAR to report an IRQ which was signalled *after* + * the CPU took an IRQ exception as multiple interrupts can race to be + * recognized by the GIC, earlier interrupts could be withdrawn, and/or + * later interrupts could be prioritized by the GIC. + * + * For devices which are tightly coupled to the CPU, such as PMUs, a + * context synchronization event is necessary to ensure that system + * register state is not stale, as these may have been indirectly written + * *after* exception entry. + * + * (2) Deactivate the interrupt when EOI mode 1 is in use. + */ +static inline void gic_complete_ack(u32 irqnr) { - bool irqs_enabled = interrupts_enabled(regs); - int err; - - if (irqs_enabled) - nmi_enter(); - if (static_branch_likely(&supports_deactivate_key)) - gic_write_eoir(irqnr); - else - isb(); + write_gicreg(irqnr, ICC_EOIR1_EL1); - /* - * Leave the PSR.I bit set to prevent other NMIs to be - * received while handling this one. - * PSR.I will be restored when we ERET to the - * interrupted context. - */ - err = handle_domain_nmi(gic_data.domain, irqnr, regs); - if (err) - gic_deactivate_unhandled(irqnr); + isb(); +} - if (irqs_enabled) - nmi_exit(); +static bool gic_rpr_is_nmi_prio(void) +{ + if (!gic_supports_pseudo_nmis()) + return false; + + return unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI)); } static u32 do_read_iar(struct pt_regs *regs) @@ -692,20 +765,83 @@ static u32 do_read_iar(struct pt_regs *regs) return iar; } -static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +static bool gic_irqnr_is_special(u32 irqnr) { - u32 irqnr; + return irqnr >= 1020 && irqnr <= 1023; +} - irqnr = do_read_iar(regs); +static void __gic_handle_irq(u32 irqnr, struct pt_regs *regs) +{ + if (gic_irqnr_is_special(irqnr)) + return; - /* Check for special IDs first */ - if ((irqnr >= 1020 && irqnr <= 1023)) + gic_complete_ack(irqnr); + + if (handle_domain_irq(gic_data.domain, irqnr, regs)) { + WARN_ONCE(true, "Unexpected interrupt received!\n"); + gic_deactivate_unhandled(irqnr); + } +} + +static void __gic_handle_nmi(u32 irqnr, struct pt_regs *regs) +{ + if (gic_irqnr_is_special(irqnr)) return; - if (gic_supports_nmi() && - unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI))) { - gic_handle_nmi(irqnr, regs); + gic_complete_ack(irqnr); + + if (handle_domain_nmi(gic_data.domain, irqnr, regs)) { + WARN_ONCE(true, "Unexpected pseudo-NMI (irqnr %u)\n", irqnr); + gic_deactivate_unhandled(irqnr); + } +} + +/* + * An exception has been taken from a context with IRQs enabled, and this could + * be an IRQ or an NMI. + * + * The entry code called us with DAIF.IF set to keep NMIs masked. We must clear + * DAIF.IF (and update ICC_PMR_EL1 to mask regular IRQs) prior to returning, + * after handling any NMI but before handling any IRQ. + * + * The entry code has performed IRQ entry, and if an NMI is detected we must + * perform NMI entry/exit around invoking the handler. + */ +static void __gic_handle_irq_from_irqson(struct pt_regs *regs) +{ + bool is_nmi; + u32 irqnr; + + /* + * We should enter here with interrupts disabled, otherwise we may met + * a race here with FEAT_NMI/FEAT_GICv3_NMI: + * + * [interrupt disabled] + * <- normal interrupt pending, for example timer interrupt + * <- NMI occurs, ISR_EL1.nmi = 1 + * do_el1_interrupt() + * <- NMI withdraw, ISR_EL1.nmi = 0 + * ISR_EL1.nmi = 0, not an NMI interrupt + * gic_handle_irq() + * __gic_handle_irq_from_irqson() + * irqnr = gic_read_iar() <- Oops, ack and handle an normal interrupt + * in interrupt disabled context! + * + * So if we met this case here, just return from the interrupt context. + * Since the interrupt is still pending, we can handle it once the + * interrupt re-enabled and it'll not be missing. + */ + if (!interrupts_enabled(regs)) return; + + irqnr = gic_read_iar(); + + is_nmi = gic_rpr_is_nmi_prio(); + + if (is_nmi) { + nmi_enter(); + __gic_handle_nmi(irqnr, regs); + nmi_exit(); } if (gic_prio_masking_enabled()) { @@ -713,15 +849,82 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs gic_arch_enable_irqs(); } - if (static_branch_likely(&supports_deactivate_key)) - gic_write_eoir(irqnr); - else - isb(); + if (!is_nmi) + __gic_handle_irq(irqnr, regs); +} - if (handle_domain_irq(gic_data.domain, irqnr, regs)) { - WARN_ONCE(true, "Unexpected interrupt received!\n"); - gic_deactivate_unhandled(irqnr); +/* + * An exception has been taken from a context with IRQs disabled, which can only + * be an NMI. + * + * The entry code called us with DAIF.IF set to keep NMIs masked. We must leave + * DAIF.IF (and ICC_PMR_EL1) unchanged. + * + * The entry code has performed NMI entry. + */ +static void __gic_handle_irq_from_irqsoff(struct pt_regs *regs) +{ + u64 pmr; + u32 irqnr; + + /* + * We were in a context with IRQs disabled. However, the + * entry code has set PMR to a value that allows any + * interrupt to be acknowledged, and not just NMIs. This can + * lead to surprising effects if the NMI has been retired in + * the meantime, and that there is an IRQ pending. The IRQ + * would then be taken in NMI context, something that nobody + * wants to debug twice. + * + * Until we sort this, drop PMR again to a level that will + * actually only allow NMIs before reading IAR, and then + * restore it to what it was. + */ + pmr = gic_read_pmr(); + gic_pmr_mask_irqs(); + isb(); + irqnr = gic_read_iar(); + gic_write_pmr(pmr); + + __gic_handle_nmi(irqnr, regs); +} + +#ifdef CONFIG_ARM64 +static inline u64 gic_read_nmiar(void) +{ + u64 irqstat; + + irqstat = read_sysreg_s(SYS_ICC_NMIAR1_EL1); + + dsb(sy); + + return irqstat; +} + +static __always_inline void __el1_nmi(struct pt_regs *regs) +{ + u32 irqnr = gic_read_nmiar(); + + nmi_enter(); + __gic_handle_nmi(irqnr, regs); + nmi_exit(); +} +#endif + +static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +{ +#ifdef CONFIG_ARM64 + /* Is there a NMI to handle? */ + if (system_uses_nmi() && (read_sysreg(isr_el1) & ISR_EL1_IS)) { + __el1_nmi(regs); + return; } +#endif + + if (unlikely(gic_supports_pseudo_nmis() && !interrupts_enabled(regs))) + __gic_handle_irq_from_irqsoff(regs); + else + __gic_handle_irq_from_irqson(regs); } #ifdef CONFIG_FAST_IRQ @@ -1135,7 +1338,7 @@ static void gic_cpu_sys_reg_init(void) /* Set priority mask register */ if (!gic_prio_masking_enabled()) { write_gicreg(DEFAULT_PMR_VALUE, ICC_PMR_EL1); - } else if (gic_supports_nmi()) { + } else if (gic_supports_pseudo_nmis()) { /* * Mismatch configuration with boot CPU, the system is likely * to die as interrupt masking will not work properly on all @@ -1917,11 +2120,16 @@ static const struct gic_quirk gic_quirks[] = { } }; +static void gic_enable_pseudo_nmis(void) +{ + static_branch_enable(&supports_pseudo_nmis); +} + static void gic_enable_nmi_support(void) { int i; - if (!gic_prio_masking_enabled()) + if (!gic_prio_masking_enabled() && !has_v3_3_nmi()) return; if (gic_data.flags & FLAGS_WORKAROUND_MTK_GICR_SAVE) { @@ -1937,47 +2145,11 @@ static void gic_enable_nmi_support(void) refcount_set(&ppi_nmi_refs[i], 0); /* - * Linux itself doesn't use 1:N distribution, so has no need to - * set PMHE. The only reason to have it set is if EL3 requires it - * (and we can't change it). + * Initialize pseudo-NMIs only if GIC driver cannot take advantage + * of core (FEAT_NMI) and GIC (FEAT_GICv3_NMI) in HW */ - if (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK) - static_branch_enable(&gic_pmr_sync); - - pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n", - static_branch_unlikely(&gic_pmr_sync) ? "forced" : "relaxed"); - - /* - * How priority values are used by the GIC depends on two things: - * the security state of the GIC (controlled by the GICD_CTRL.DS bit) - * and if Group 0 interrupts can be delivered to Linux in the non-secure - * world as FIQs (controlled by the SCR_EL3.FIQ bit). These affect the - * the ICC_PMR_EL1 register and the priority that software assigns to - * interrupts: - * - * GICD_CTRL.DS | SCR_EL3.FIQ | ICC_PMR_EL1 | Group 1 priority - * ----------------------------------------------------------- - * 1 | - | unchanged | unchanged - * ----------------------------------------------------------- - * 0 | 1 | non-secure | non-secure - * ----------------------------------------------------------- - * 0 | 0 | unchanged | non-secure - * - * where non-secure means that the value is right-shifted by one and the - * MSB bit set, to make it fit in the non-secure priority range. - * - * In the first two cases, where ICC_PMR_EL1 and the interrupt priority - * are both either modified or unchanged, we can use the same set of - * priorities. - * - * In the last case, where only the interrupt priorities are modified to - * be in the non-secure range, we use a different PMR value to mask IRQs - * and the rest of the values that we use remain unchanged. - */ - if (gic_has_group0() && !gic_dist_security_disabled()) - static_branch_enable(&gic_nonsecure_priorities); - - static_branch_enable(&supports_pseudo_nmis); + if (!has_v3_3_nmi()) + gic_enable_pseudo_nmis(); if (static_branch_likely(&supports_deactivate_key)) gic_eoimode1_chip.flags |= IRQCHIP_SUPPORTS_NMI; @@ -2043,8 +2215,7 @@ static int __init gic_init_bases(void __iomem *dist_base, irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED); gic_data.has_rss = !!(typer & GICD_TYPER_RSS); - pr_info("Distributor has %sRange Selector support\n", - gic_data.has_rss ? "" : "no "); + gic_data.has_nmi = !!(typer & GICD_TYPER_NMI); if (typer & GICD_TYPER_MBIS) { err = mbi_init(handle, gic_data.domain); diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 743f52d94d92a97fe6770ffa1521d8597f67d87b..ad679eed316f3098cc6dd92d0e0f6320a209a004 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -755,6 +755,11 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu) return per_cpu(hw_events->irq, cpu); } +bool arm_pmu_irq_is_nmi(void) +{ + return has_nmi; +} + /* * PMU hardware loses all context when a CPU goes offline. * When a CPU is hotplugged back in, since some hardware registers are diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index b8b723cdaaf049dfc8f34097b9244e020e90a3f8..dd4db4ec89db849437fb3500f6dbf19840fbbc78 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -30,6 +30,7 @@ #define GICD_ICFGR 0x0C00 #define GICD_IGRPMODR 0x0D00 #define GICD_NSACR 0x0E00 +#define GICD_INMIR 0x0F80 #define GICD_IGROUPRnE 0x1000 #define GICD_ISENABLERnE 0x1200 #define GICD_ICENABLERnE 0x1400 @@ -39,6 +40,7 @@ #define GICD_ICACTIVERnE 0x1C00 #define GICD_IPRIORITYRnE 0x2000 #define GICD_ICFGRnE 0x3000 +#define GICD_INMIRnE 0x3B00 #define GICD_IROUTER 0x6000 #define GICD_IROUTERnE 0x8000 #define GICD_IDREGS 0xFFD0 @@ -85,6 +87,7 @@ #define GICD_TYPER_LPIS (1U << 17) #define GICD_TYPER_MBIS (1U << 16) #define GICD_TYPER_ESPI (1U << 8) +#define GICD_TYPER_NMI (1U << 9) #define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) #define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1) @@ -238,6 +241,7 @@ #define GICR_ICFGR0 GICD_ICFGR #define GICR_IGRPMODR0 GICD_IGRPMODR #define GICR_NSACR GICD_NSACR +#define GICR_INMIR0 GICD_INMIR #define GICR_TYPER_PLPIS (1U << 0) #define GICR_TYPER_VLPIS (1U << 1) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index ae9dbedb9849875856561081de2cd817efd6ce34..d2749fb3dc2d791e57039658376f739b0223b0a9 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -13,6 +13,7 @@ #ifdef CONFIG_LOCKUP_DETECTOR void lockup_detector_init(void); +void lockup_detector_retry_init(void); void lockup_detector_soft_poweroff(void); void lockup_detector_cleanup(void); bool is_hardlockup(void); @@ -36,6 +37,7 @@ extern int sysctl_hardlockup_all_cpu_backtrace; #else /* CONFIG_LOCKUP_DETECTOR */ static inline void lockup_detector_init(void) { } +static inline void lockup_detector_retry_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } static inline void lockup_detector_cleanup(void) { } #endif /* !CONFIG_LOCKUP_DETECTOR */ @@ -113,6 +115,7 @@ extern void hardlockup_detector_perf_disable(void); extern void hardlockup_detector_perf_enable(void); extern void hardlockup_detector_perf_cleanup(void); extern int hardlockup_detector_perf_init(void); +extern void hardlockup_detector_perf_adjust_period(int cpu, u64 period); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } @@ -124,6 +127,7 @@ static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } # else static inline int hardlockup_detector_perf_init(void) { return 0; } # endif +static inline void hardlockup_detector_perf_adjust_period(int cpu, u64 period) { } #endif #ifdef CONFIG_CORELOCKUP_DETECTOR @@ -243,9 +247,17 @@ int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *); #endif #ifdef CONFIG_SDEI_WATCHDOG +int sdei_watchdog_nmi_enable(unsigned int cpu); +void sdei_watchdog_nmi_disable(unsigned int cpu); void sdei_watchdog_clear_eoi(void); +int sdei_watchdog_nmi_probe(void); +extern bool disable_sdei_nmi_watchdog; #else +static inline int sdei_watchdog_nmi_enable(unsigned int cpu) { return -ENODEV; } +static inline void sdei_watchdog_nmi_disable(unsigned int cpu) { } static inline void sdei_watchdog_clear_eoi(void) { } +static inline int sdei_watchdog_nmi_probe(void) { return -ENODEV; } +#define disable_sdei_nmi_watchdog 1 #endif #endif diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index c7a35d32127271d8f2e3a3b8f796f10466436e03..2b9e7f04fcc25168d71b05660e270b71c153e11a 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -192,6 +192,9 @@ static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } #ifdef CONFIG_CVM_HOST void arm_pmu_set_phys_irq(bool enable); #endif + +bool arm_pmu_irq_is_nmi(void); + /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); diff --git a/init/main.c b/init/main.c index 2b466bd041104f56047fdb96107f55633c8470e5..23c04e808dbaadc660fa577b63b32382cbc00afb 100644 --- a/init/main.c +++ b/init/main.c @@ -1544,6 +1544,7 @@ static noinline void __init kernel_init_freeable(void) rcu_init_tasks_generic(); do_pre_smp_initcalls(); + lockup_detector_init(); smp_init(); sched_init_smp(); @@ -1555,8 +1556,6 @@ static noinline void __init kernel_init_freeable(void) do_basic_setup(); - lockup_detector_init(); - kunit_run_all_tests(); console_on_rootfs(); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 88be068e9922683c81cb96b00d518a9cff26939e..3f5c3f5688e16c408bc7c8c32700298926573da1 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -116,7 +116,13 @@ void __weak watchdog_nmi_disable(unsigned int cpu) hardlockup_detector_perf_disable(); } -/* Return 0, if a NMI watchdog is available. Error code otherwise */ +/* + * Watchdog-detector specific API. + * + * Return 0 when hardlockup watchdog is available, negative value otherwise. + * Note that the negative value means that a delayed probe might + * succeed later. + */ int __weak __init watchdog_nmi_probe(void) { return hardlockup_detector_perf_init(); @@ -481,8 +487,12 @@ static void watchdog_enable(unsigned int cpu) /* Initialize timestamp */ update_touch_ts(); /* Enable the perf event */ - if (watchdog_enabled & NMI_WATCHDOG_ENABLED) - nmi_watchdog_ops.watchdog_nmi_enable(cpu); + if (watchdog_enabled & NMI_WATCHDOG_ENABLED) { + if (disable_sdei_nmi_watchdog) + watchdog_nmi_enable(cpu); + else + sdei_watchdog_nmi_enable(cpu); + } } static void watchdog_disable(unsigned int cpu) @@ -496,7 +506,10 @@ static void watchdog_disable(unsigned int cpu) * between disabling the timer and disabling the perf event causes * the perf NMI to detect a false positive. */ - nmi_watchdog_ops.watchdog_nmi_disable(cpu); + if (disable_sdei_nmi_watchdog) + watchdog_nmi_disable(cpu); + else + sdei_watchdog_nmi_disable(cpu); hrtimer_cancel(hrtimer); wait_for_completion(this_cpu_ptr(&softlockup_completion)); } @@ -795,6 +808,67 @@ void __weak watchdog_ops_init(void) { } +static void __init lockup_detector_delay_init(struct work_struct *work); +static bool allow_lockup_detector_init_retry __initdata; + +static struct work_struct detector_work __initdata = + __WORK_INITIALIZER(detector_work, lockup_detector_delay_init); + +static void __init lockup_detector_delay_init(struct work_struct *work) +{ + int ret; + + if (disable_sdei_nmi_watchdog) { + ret = watchdog_nmi_probe(); + } else { + ret = sdei_watchdog_nmi_probe(); + } + + if (ret) { + pr_info("Delayed init of the lockup detector failed: %d\n", ret); + pr_info("Hard watchdog permanently disabled\n"); + return; + } + + allow_lockup_detector_init_retry = false; + + nmi_watchdog_available = true; + lockup_detector_setup(); +} + +/* + * lockup_detector_retry_init - retry init lockup detector if possible. + * + * Retry hardlockup detector init. It is useful when it requires some + * functionality that has to be initialized later on a particular + * platform. + */ +void __init lockup_detector_retry_init(void) +{ + /* Must be called before late init calls */ + if (!allow_lockup_detector_init_retry) + return; + + schedule_work(&detector_work); +} + +/* + * Ensure that optional delayed hardlockup init is proceed before + * the init code and memory is freed. + */ +static int __init lockup_detector_check(void) +{ + /* Prevent any later retry. */ + allow_lockup_detector_init_retry = false; + + /* Make sure no work is pending. */ + flush_work(&detector_work); + + return 0; + +} +late_initcall_sync(lockup_detector_check); + void __init lockup_detector_init(void) { watchdog_ops_init(); @@ -805,8 +879,11 @@ void __init lockup_detector_init(void) cpumask_copy(&watchdog_cpumask, housekeeping_cpumask(HK_FLAG_TIMER)); - if (!nmi_watchdog_ops.watchdog_nmi_probe()) + if (disable_sdei_nmi_watchdog && !watchdog_nmi_probe()) nmi_watchdog_available = true; + else + allow_lockup_detector_init_retry = true; + lockup_detector_setup(); #ifdef CONFIG_CORELOCKUP_DETECTOR if (enable_corelockup_detector) diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index e4f158f56eda97a261cf4de4a5abfd77a162e955..ba5593d7b619e7c110e0a823c434989d96aea958 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -463,6 +463,29 @@ void hardlockup_detector_perf_cleanup(void) cpumask_clear(&dead_events_mask); } +/** + * hardlockup_detector_perf_adjust_period - Adjust the event period due + * to cpu frequency change + * @cpu: The CPU whose event period will be adjusted + * @period: The target period to be set + */ +void hardlockup_detector_perf_adjust_period(int cpu, u64 period) +{ + struct perf_event *event = per_cpu(watchdog_ev, cpu); + + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return; + + if (!event) + return; + + if (event->attr.sample_period == period) + return; + + if (perf_event_period(event, period)) + pr_err("failed to change period to %llu\n", period); +} + /** * hardlockup_detector_perf_stop - Globally stop watchdog events *