From f62f085651eca7f8b84cb6e958b1350971ff81dc Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 7 Sep 2023 11:02:07 -0700 Subject: [PATCH 01/23] cpufreq: intel_pstate: Revise global turbo disable check commit 37b6ddba967c601479bea418a7ac6ff16b6232b7 upstream. Setting global turbo flag based on CPU 0 P-state limits is problematic as it limits max P-state request on every CPU on the system just based on its P-state limits. There are two cases in which global.turbo_disabled flag is set: - When the MSR_IA32_MISC_ENABLE_TURBO_DISABLE bit is set to 1 in the MSR MSR_IA32_MISC_ENABLE. This bit can be only changed by the system BIOS before power up. - When the max non turbo P-state is same as max turbo P-state for CPU 0. The second check is not a valid to decide global turbo state based on the CPU 0. CPU 0 max turbo P-state can be same as max non turbo P-state, but for other CPUs this may not be true. There is no guarantee that max P-state limits are same for every CPU. This is possible that during fusing max P-state for a CPU is constrained. Also with the Intel Speed Select performance profile, CPU 0 may not be present in all profiles. In this case the max non turbo and turbo P-state can be set to the lowest possible P-state by the hardware when switched to such profile. Since max non turbo and turbo P-state is same, global.turbo_disabled flag will be set. Once global.turbo_disabled is set, any scaling max and min frequency update for any CPU will result in its max P-state constrained to the max non turbo P-state. Hence remove the check of max non turbo P-state equal to max turbo P-state of CPU 0 to set global turbo disabled flag. Intel-SIG: commit 37b6ddba967c cpufreq: intel_pstate: Revise global turbo disable check. Backport intel_pstate driver update for 6.6 Signed-off-by: Srinivas Pandruvada [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 8a4fdf212ce0..7cb35842f68a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -595,13 +595,9 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) static inline void update_turbo_state(void) { u64 misc_en; - struct cpudata *cpu; - cpu = all_cpu_data[0]; rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); - global.turbo_disabled = - (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || - cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); + global.turbo_disabled = misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE; } static int min_perf_pct_min(void) -- Gitee From 290b9295c5e6f7afaffe87486f88b3011dc42cdf Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 20 Nov 2023 10:59:42 -0800 Subject: [PATCH 02/23] cpufreq: intel_pstate: Prioritize firmware-provided balance performance EPP commit 2719675fa8111a8d7a060133e1dd4797d20c9754 upstream. The platform firmware can provide a balance performance EPP value by enabling HWP and programming the EPP to the desired value. However, currently this only takes effect for processors listed in intel_epp_balance_perf[], so in order to enable a new processor model to utilize this mechanism, that table needs to be updated. It arguably should not be necessary to modify the kernel to work properly with every new generation of processors, though, and distributions that don't always ship the most recent kernels should be able to run reasonably well on new hardware without code changes. For this reason, move the check to avoid updating the EPP when the balance performance EPP is unmodified from the power-up default of 0x80 after the check that allows the firmware-provided balance performance EPP value to be retrieved. This will cause the code to always look for the firmware- provided value before consulting intel_epp_balance_perf[] and the handling of new hardware will not depend on whether or not that thable has been updated yet. Intel-SIG: commit 2719675fa811 cpufreq: intel_pstate: Prioritize firmware-provided balance performance EPP. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Srinivas Pandruvada [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7cb35842f68a..df231d7f65a1 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1715,13 +1715,6 @@ static void intel_pstate_update_epp_defaults(struct cpudata *cpudata) { cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); - /* - * If this CPU gen doesn't call for change in balance_perf - * EPP return. - */ - if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) - return; - /* * If the EPP is set by firmware, which means that firmware enabled HWP * - Is equal or less than 0x80 (default balance_perf EPP) @@ -1734,6 +1727,13 @@ static void intel_pstate_update_epp_defaults(struct cpudata *cpudata) return; } + /* + * If this CPU gen doesn't call for change in balance_perf + * EPP return. + */ + if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) + return; + /* * Use hard coded value per gen to update the balance_perf * and default EPP. -- Gitee From 26a9e05770cfa8f3229fd62bd6c178a59d9e6fc3 Mon Sep 17 00:00:00 2001 From: Zhenguo Yao Date: Wed, 13 Dec 2023 18:28:08 +0800 Subject: [PATCH 03/23] cpufreq: intel_pstate: Add Emerald Rapids support in no-HWP mode commit e95013156ad88e6a1e1db6545881f49183e2ee0a upstream. Users may disable HWP in firmware, in which case intel_pstate will give up unless the CPU model is explicitly supported. See also the following past commits: - commit df51f287b5de ("cpufreq: intel_pstate: Add Sapphire Rapids support in no-HWP mode") - commit d8de7a44e11f ("cpufreq: intel_pstate: Add Skylake servers support") - commit 706c5328851d ("cpufreq: intel_pstate: Add Cometlake support in no-HWP mode") - commit fbdc21e9b038 ("cpufreq: intel_pstate: Add Icelake servers support in no-HWP mode") - commit 71bb5c82aaae ("cpufreq: intel_pstate: Add Tigerlake support in no-HWP mode") Intel-SIG: commit e95013156ad8 cpufreq: intel_pstate: Add Emerald Rapids support in no-HWP mode. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Zhenguo Yao Acked-by: Srinivas Pandruvada [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index df231d7f65a1..bd621dda767b 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2430,6 +2430,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { X86_MATCH(ICELAKE_X, core_funcs), X86_MATCH(TIGERLAKE, core_funcs), X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), + X86_MATCH(EMERALDRAPIDS_X, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); -- Gitee From 0fa2700150db784f992023ad6165e758ec136881 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 13 Feb 2024 12:16:00 +0100 Subject: [PATCH 04/23] cpufreq: intel_pstate: remove cpudata::prev_cummulative_iowait commit 4615ac9010be84d676f9e893e5a7ea4b5febd1e8 upstream. Commit 09c448d3c61f ("cpufreq: intel_pstate: Use IOWAIT flag in Atom algorithm") removed the last user of cpudata::prev_cummulative_iowait. Remove the member too. Found by https://github.com/jirislaby/clang-struct. Intel-SIG: commit 4615ac9010be cpufreq: intel_pstate: remove cpudata. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index bd621dda767b..f9f801e2e088 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -201,8 +201,6 @@ struct global_params { * @prev_aperf: Last APERF value read from APERF MSR * @prev_mperf: Last MPERF value read from MPERF MSR * @prev_tsc: Last timestamp counter (TSC) value - * @prev_cummulative_iowait: IO Wait time difference from last and - * current sample * @sample: Storage for storing last Sample data * @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios * @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios @@ -241,7 +239,6 @@ struct cpudata { u64 prev_aperf; u64 prev_mperf; u64 prev_tsc; - u64 prev_cummulative_iowait; struct sample sample; int32_t min_perf_ratio; int32_t max_perf_ratio; -- Gitee From 79a159dbaa63c2cbae384eee3b8f9a44f2bec1ad Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 19 Feb 2024 18:26:06 -0800 Subject: [PATCH 05/23] cpufreq: intel_pstate: Allow model specific EPPs commit 240a8da623008eb9f4e32c7a19ce16a6605911dc upstream. The current implementation allows model specific EPP override for balanced_performance. Add feature to allow model specific EPP for all predefined EPP strings. For example for some CPU models, even changing performance EPP has benefits Use a mask of EPPs as driver_data instead of just balanced_performance. Intel-SIG: commit 240a8da62300 cpufreq: intel_pstate: Allow model specific EPPs. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 41 +++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f9f801e2e088..be3db0003e2e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -3400,14 +3401,29 @@ static bool intel_pstate_hwp_is_enabled(void) return !!(value & 0x1); } -static const struct x86_cpu_id intel_epp_balance_perf[] = { +#define POWERSAVE_MASK GENMASK(7, 0) +#define BALANCE_POWER_MASK GENMASK(15, 8) +#define BALANCE_PERFORMANCE_MASK GENMASK(23, 16) +#define PERFORMANCE_MASK GENMASK(31, 24) + +#define HWP_SET_EPP_VALUES(powersave, balance_power, balance_perf, performance) \ + (FIELD_PREP_CONST(POWERSAVE_MASK, powersave) |\ + FIELD_PREP_CONST(BALANCE_POWER_MASK, balance_power) |\ + FIELD_PREP_CONST(BALANCE_PERFORMANCE_MASK, balance_perf) |\ + FIELD_PREP_CONST(PERFORMANCE_MASK, performance)) + +#define HWP_SET_DEF_BALANCE_PERF_EPP(balance_perf) \ + (HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, HWP_EPP_BALANCE_POWERSAVE,\ + balance_perf, HWP_EPP_PERFORMANCE)) + +static const struct x86_cpu_id intel_epp_default[] = { /* * Set EPP value as 102, this is the max suggested EPP * which can result in one core turbo frequency for * AlderLake Mobile CPUs. */ - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 102), - X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 32), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), {} }; @@ -3500,10 +3516,23 @@ static int __init intel_pstate_init(void) intel_pstate_sysfs_expose_params(); if (hwp_active) { - const struct x86_cpu_id *id = x86_match_cpu(intel_epp_balance_perf); + const struct x86_cpu_id *id = x86_match_cpu(intel_epp_default); - if (id) - epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = id->driver_data; + if (id) { + epp_values[EPP_INDEX_POWERSAVE] = + FIELD_GET(POWERSAVE_MASK, id->driver_data); + epp_values[EPP_INDEX_BALANCE_POWERSAVE] = + FIELD_GET(BALANCE_POWER_MASK, id->driver_data); + epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = + FIELD_GET(BALANCE_PERFORMANCE_MASK, id->driver_data); + epp_values[EPP_INDEX_PERFORMANCE] = + FIELD_GET(PERFORMANCE_MASK, id->driver_data); + pr_debug("Updated EPPs powersave:%x balanced power:%x balanced perf:%x performance:%x\n", + epp_values[EPP_INDEX_POWERSAVE], + epp_values[EPP_INDEX_BALANCE_POWERSAVE], + epp_values[EPP_INDEX_BALANCE_PERFORMANCE], + epp_values[EPP_INDEX_PERFORMANCE]); + } } mutex_lock(&intel_pstate_driver_lock); -- Gitee From f6650e4c69293d54cceba87987c9793b9201a877 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 21 Mar 2024 20:29:43 +0100 Subject: [PATCH 06/23] cpufreq: intel_pstate: Drop redundant locking from intel_pstate_driver_cleanup() commit f186b2dace86f36cc08872b693185eaf71128898 upstream. Remove the spinlock locking from intel_pstate_driver_cleanup() as it is not necessary because no other code accessing all_cpu_data[] can run in parallel with that function. Had the locking been necessary, though, it would have been incorrect because the lock in question is acquired from a hardirq handler and it cannot be acquired from thread context without disabling interrupts. Intel-SIG: commit f186b2dace86 cpufreq: intel_pstate: Drop redundant locking from. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index be3db0003e2e..f49df43b0d04 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3131,10 +3131,8 @@ static void intel_pstate_driver_cleanup(void) if (intel_pstate_driver == &intel_pstate) intel_pstate_clear_update_util_hook(cpu); - raw_spin_lock(&hwp_notify_lock); kfree(all_cpu_data[cpu]); WRITE_ONCE(all_cpu_data[cpu], NULL); - raw_spin_unlock(&hwp_notify_lock); } } cpus_read_unlock(); -- Gitee From 7356a7c665eaa9b1a0cb224cab0e4553fc95eb8e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 21 Mar 2024 20:32:02 +0100 Subject: [PATCH 07/23] cpufreq: intel_pstate: Wait for canceled delayed work to complete commit 432acb219af4edecdd11d360f30b7cc643524db8 upstream. Make intel_pstate_disable_hwp_interrupt() wait for canceled delayed work to complete to avoid leftover work items running when it returns which may be during driver unregistration and may confuse things going forward. Intel-SIG: commit 432acb219af4 cpufreq: intel_pstate: Wait for canceled delayed work to complete. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f49df43b0d04..0383adc780f3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1679,6 +1679,7 @@ void notify_hwp_interrupt(void) static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) { unsigned long flags; + bool cancel_work; if (!boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) return; @@ -1687,9 +1688,11 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); raw_spin_lock_irqsave(&hwp_notify_lock, flags); - if (cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask)) - cancel_delayed_work(&cpudata->hwp_notify_work); + cancel_work = cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask); raw_spin_unlock_irqrestore(&hwp_notify_lock, flags); + + if (cancel_work) + cancel_delayed_work_sync(&cpudata->hwp_notify_work); } static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) -- Gitee From e358e0e9f8fb35036831021122bab24a29aead6c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 28 Mar 2024 19:52:06 +0100 Subject: [PATCH 08/23] cpufreq: intel_pstate: Get rid of unnecessary READ_ONCE() annotations commit 0f2828e17b6f41b8b345f0031e3fe58529991748 upstream. Drop two redundant checks involving READ_ONCE() from notify_hwp_interrupt() and make it check hwp_active without READ_ONCE() which is not necessary, because that variable is only set once during the early initialization of the driver. In order to make that clear, annotate hwp_active with __ro_after_init. Intel-SIG: commit 0f2828e17b6f cpufreq: intel_pstate: Get rid of unnecessary READ_ONCE() annotations. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 0383adc780f3..b9d88c5b5bac 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -292,7 +292,7 @@ struct pstate_funcs { static struct pstate_funcs pstate_funcs __read_mostly; -static int hwp_active __read_mostly; +static bool hwp_active __ro_after_init; static int hwp_mode_bdw __read_mostly; static bool per_cpu_limits __read_mostly; static bool hwp_boost __read_mostly; @@ -1632,11 +1632,10 @@ static cpumask_t hwp_intr_enable_mask; void notify_hwp_interrupt(void) { unsigned int this_cpu = smp_processor_id(); - struct cpudata *cpudata; unsigned long flags; u64 value; - if (!READ_ONCE(hwp_active) || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) return; rdmsrl_safe(MSR_HWP_STATUS, &value); @@ -1648,24 +1647,8 @@ void notify_hwp_interrupt(void) if (!cpumask_test_cpu(this_cpu, &hwp_intr_enable_mask)) goto ack_intr; - /* - * Currently we never free all_cpu_data. And we can't reach here - * without this allocated. But for safety for future changes, added - * check. - */ - if (unlikely(!READ_ONCE(all_cpu_data))) - goto ack_intr; - - /* - * The free is done during cleanup, when cpufreq registry is failed. - * We wouldn't be here if it fails on init or switch status. But for - * future changes, added check. - */ - cpudata = READ_ONCE(all_cpu_data[this_cpu]); - if (unlikely(!cpudata)) - goto ack_intr; - - schedule_delayed_work(&cpudata->hwp_notify_work, msecs_to_jiffies(10)); + schedule_delayed_work(&all_cpu_data[this_cpu]->hwp_notify_work, + msecs_to_jiffies(10)); raw_spin_unlock_irqrestore(&hwp_notify_lock, flags); @@ -3456,7 +3439,7 @@ static int __init intel_pstate_init(void) * deal with it. */ if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || hwp_forced) { - WRITE_ONCE(hwp_active, 1); + hwp_active = true; hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; intel_cpufreq.attr = hwp_cpufreq_attrs; -- Gitee From 4cfe28686c36c034e992c9d55ca3edb790d0df9e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 21 Mar 2024 20:34:06 +0100 Subject: [PATCH 09/23] cpufreq: intel_pstate: Use __ro_after_init for three variables commit e97a98238da68aea4a0be0b2cc40e39527c880b1 upstream. There are at least 3 variables in intel_pstate that do not get updated after they have been initialized, so annotate them with __ro_after_init. Intel-SIG: commit e97a98238da6 cpufreq: intel_pstate: Use __ro_after_init for three variables. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b9d88c5b5bac..d7237285f7d0 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -293,10 +293,10 @@ struct pstate_funcs { static struct pstate_funcs pstate_funcs __read_mostly; static bool hwp_active __ro_after_init; -static int hwp_mode_bdw __read_mostly; -static bool per_cpu_limits __read_mostly; +static int hwp_mode_bdw __ro_after_init; +static bool per_cpu_limits __ro_after_init; +static bool hwp_forced __ro_after_init; static bool hwp_boost __read_mostly; -static bool hwp_forced __read_mostly; static struct cpufreq_driver *intel_pstate_driver __read_mostly; -- Gitee From 6ad71edcb858bf4236c39724b1ff66b172ba372d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Mar 2024 18:01:58 +0100 Subject: [PATCH 10/23] cpufreq: intel_pstate: Fold intel_pstate_max_within_limits() into caller commit 032c5565eb80edb6f2faeb31939540c897987119 upstream. Fold intel_pstate_max_within_limits() into its only caller. No functional impact. Intel-SIG: commit 032c5565eb80 cpufreq: intel_pstate: Fold intel_pstate_max_within_limits() into caller. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index d7237285f7d0..668a087a5737 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2011,14 +2011,6 @@ static void intel_pstate_set_min_pstate(struct cpudata *cpu) intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); } -static void intel_pstate_max_within_limits(struct cpudata *cpu) -{ - int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio); - - update_turbo_state(); - intel_pstate_set_pstate(cpu, pstate); -} - static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { int perf_ctl_max_phys = pstate_funcs.get_max_physical(cpu->cpu); @@ -2593,12 +2585,15 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) intel_pstate_update_perf_limits(cpu, policy->min, policy->max); if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { + int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio); + /* * NOHZ_FULL CPUs need this as the governor callback may not * be invoked on them. */ intel_pstate_clear_update_util_hook(policy->cpu); - intel_pstate_max_within_limits(cpu); + update_turbo_state(); + intel_pstate_set_pstate(cpu, pstate); } else { intel_pstate_set_update_util_hook(policy->cpu); } -- Gitee From 80085f4852a30801e29da8795a02acde4e3c108c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Mar 2024 18:02:42 +0100 Subject: [PATCH 11/23] cpufreq: intel_pstate: Do not update global.turbo_disabled after initialization commit 0940f1a8011fd69be5082015068e0dc31c800c20 upstream. The global.turbo_disabled is updated quite often, especially in the passive mode in which case it is updated every time the scheduler calls into the driver. However, this is generally not necessary and it adds MSR read overhead to scheduler code paths (and that particular MSR is slow to read). For this reason, make the driver read MSR_IA32_MISC_ENABLE_TURBO_DISABLE just once at the cpufreq driver registration time and remove all of the in-flight updates of global.turbo_disabled. Intel-SIG: commit 0940f1a8011f cpufreq: intel_pstate: Do not update global.turbo_disabled. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 51 ++++++---------------------------- 1 file changed, 8 insertions(+), 43 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 668a087a5737..30fd578e89c1 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -173,7 +173,6 @@ struct vid_data { * based on the MSR_IA32_MISC_ENABLE value and whether or * not the maximum reported turbo P-state is different from * the maximum reported non-turbo one. - * @turbo_disabled_mf: The @turbo_disabled value reflected by cpuinfo.max_freq. * @min_perf_pct: Minimum capacity limit in percent of the maximum turbo * P-state capacity. * @max_perf_pct: Maximum capacity limit in percent of the maximum turbo @@ -182,7 +181,6 @@ struct vid_data { struct global_params { bool no_turbo; bool turbo_disabled; - bool turbo_disabled_mf; int max_perf_pct; int min_perf_pct; }; @@ -590,12 +588,13 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq); } -static inline void update_turbo_state(void) +static bool turbo_is_disabled(void) { u64 misc_en; rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); - global.turbo_disabled = misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE; + + return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); } static int min_perf_pct_min(void) @@ -1150,40 +1149,16 @@ static void intel_pstate_update_policies(void) static void __intel_pstate_update_max_freq(struct cpudata *cpudata, struct cpufreq_policy *policy) { - policy->cpuinfo.max_freq = global.turbo_disabled_mf ? + policy->cpuinfo.max_freq = global.turbo_disabled ? cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; refresh_frequency_limits(policy); } -static void intel_pstate_update_max_freq(unsigned int cpu) -{ - struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - - if (!policy) - return; - - __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); - - cpufreq_cpu_release(policy); -} - static void intel_pstate_update_limits(unsigned int cpu) { mutex_lock(&intel_pstate_driver_lock); - update_turbo_state(); - /* - * If turbo has been turned on or off globally, policy limits for - * all CPUs need to be updated to reflect that. - */ - if (global.turbo_disabled_mf != global.turbo_disabled) { - global.turbo_disabled_mf = global.turbo_disabled; - arch_set_max_freq_ratio(global.turbo_disabled); - for_each_possible_cpu(cpu) - intel_pstate_update_max_freq(cpu); - } else { - cpufreq_update_policy(cpu); - } + cpufreq_update_policy(cpu); mutex_unlock(&intel_pstate_driver_lock); } @@ -1283,7 +1258,6 @@ static ssize_t show_no_turbo(struct kobject *kobj, return -EAGAIN; } - update_turbo_state(); if (global.turbo_disabled) ret = sprintf(buf, "%u\n", global.turbo_disabled); else @@ -1313,7 +1287,6 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, mutex_lock(&intel_pstate_limits_lock); - update_turbo_state(); if (global.turbo_disabled) { pr_notice_once("Turbo disabled by BIOS or unavailable on processor\n"); mutex_unlock(&intel_pstate_limits_lock); @@ -2280,8 +2253,6 @@ static void intel_pstate_adjust_pstate(struct cpudata *cpu) struct sample *sample; int target_pstate; - update_turbo_state(); - target_pstate = get_target_pstate(cpu); target_pstate = intel_pstate_prepare_request(cpu, target_pstate); trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu); @@ -2592,7 +2563,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) * be invoked on them. */ intel_pstate_clear_update_util_hook(policy->cpu); - update_turbo_state(); intel_pstate_set_pstate(cpu, pstate); } else { intel_pstate_set_update_util_hook(policy->cpu); @@ -2636,7 +2606,6 @@ static void intel_pstate_verify_cpu_policy(struct cpudata *cpu, { int max_freq; - update_turbo_state(); if (hwp_active) { intel_pstate_get_hwp_cap(cpu); max_freq = global.no_turbo || global.turbo_disabled ? @@ -2733,8 +2702,6 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_freq; - update_turbo_state(); - global.turbo_disabled_mf = global.turbo_disabled; policy->cpuinfo.max_freq = global.turbo_disabled ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; @@ -2900,8 +2867,6 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; int target_pstate; - update_turbo_state(); - freqs.old = policy->cur; freqs.new = target_freq; @@ -2923,8 +2888,6 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, struct cpudata *cpu = all_cpu_data[policy->cpu]; int target_pstate; - update_turbo_state(); - target_pstate = intel_pstate_freq_to_hwp(cpu, target_freq); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true); @@ -2942,7 +2905,6 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, int old_pstate = cpu->pstate.current_pstate; int cap_pstate, min_pstate, max_pstate, target_pstate; - update_turbo_state(); cap_pstate = global.turbo_disabled ? HWP_GUARANTEED_PERF(hwp_cap) : HWP_HIGHEST_PERF(hwp_cap); @@ -3130,6 +3092,9 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) memset(&global, 0, sizeof(global)); global.max_perf_pct = 100; + global.turbo_disabled = turbo_is_disabled(); + + arch_set_max_freq_ratio(global.turbo_disabled); intel_pstate_driver = driver; ret = cpufreq_register_driver(intel_pstate_driver); -- Gitee From a9a67753eb102fd054407fe7f84178d3eb48ca48 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Mar 2024 18:03:25 +0100 Subject: [PATCH 12/23] cpufreq: intel_pstate: Rearrange show_no_turbo() and store_no_turbo() commit c626a438452079824139f97137f17af47b1a8989 upstream. Now that global.turbo_disabled can only change at the cpufreq driver registration time, initialize global.no_turbo at that time too so they are in sync to start with (if the former is set, the latter cannot be updated later anyway). That allows show_no_turbo() to be simlified because it does not need to check global.turbo_disabled and store_no_turbo() can be rearranged to avoid doing anything if the new value of global.no_turbo is equal to the current one and only return an error on attempts to clear global.no_turbo when global.turbo_disabled. While at it, eliminate the redundant ret variable from store_no_turbo(). No intentional functional impact. Intel-SIG: commit c626a4384520 cpufreq: intel_pstate: Rearrange show_no_turbo() and store_no_turbo(). Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 30fd578e89c1..755c0a13f927 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1258,10 +1258,7 @@ static ssize_t show_no_turbo(struct kobject *kobj, return -EAGAIN; } - if (global.turbo_disabled) - ret = sprintf(buf, "%u\n", global.turbo_disabled); - else - ret = sprintf(buf, "%u\n", global.no_turbo); + ret = sprintf(buf, "%u\n", global.no_turbo); mutex_unlock(&intel_pstate_driver_lock); @@ -1272,31 +1269,34 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; - int ret; + bool no_turbo; - ret = sscanf(buf, "%u", &input); - if (ret != 1) + if (sscanf(buf, "%u", &input) != 1) return -EINVAL; mutex_lock(&intel_pstate_driver_lock); if (!intel_pstate_driver) { - mutex_unlock(&intel_pstate_driver_lock); - return -EAGAIN; + count = -EAGAIN; + goto unlock_driver; } - mutex_lock(&intel_pstate_limits_lock); + no_turbo = !!clamp_t(int, input, 0, 1); + + if (no_turbo == global.no_turbo) + goto unlock_driver; if (global.turbo_disabled) { pr_notice_once("Turbo disabled by BIOS or unavailable on processor\n"); - mutex_unlock(&intel_pstate_limits_lock); - mutex_unlock(&intel_pstate_driver_lock); - return -EPERM; + count = -EPERM; + goto unlock_driver; } - global.no_turbo = clamp_t(int, input, 0, 1); + global.no_turbo = no_turbo; + + mutex_lock(&intel_pstate_limits_lock); - if (global.no_turbo) { + if (no_turbo) { struct cpudata *cpu = all_cpu_data[0]; int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate; @@ -1308,8 +1308,9 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, mutex_unlock(&intel_pstate_limits_lock); intel_pstate_update_policies(); - arch_set_max_freq_ratio(global.no_turbo); + arch_set_max_freq_ratio(no_turbo); +unlock_driver: mutex_unlock(&intel_pstate_driver_lock); return count; @@ -3093,6 +3094,7 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) memset(&global, 0, sizeof(global)); global.max_perf_pct = 100; global.turbo_disabled = turbo_is_disabled(); + global.no_turbo = global.turbo_disabled; arch_set_max_freq_ratio(global.turbo_disabled); -- Gitee From 5bf1012d5454f3e2295c1054e6bb7d0017540f4e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Mar 2024 18:04:24 +0100 Subject: [PATCH 13/23] cpufreq: intel_pstate: Read global.no_turbo under READ_ONCE() commit 9558fae8ce97b3b320b387dd7c88309df2c36d4d upstream. Because global.no_turbo is generally not read under intel_pstate_driver_lock make store_no_turbo() use WRITE_ONCE() for updating it (this is the only place at which it is updated except for the initialization) and make the majority of places reading it use READ_ONCE(). Also remove redundant global.turbo_disabled checks from places that depend on the 'true' value of global.no_turbo because it can only be 'true' if global.turbo_disabled is also 'true'. Intel-SIG: commit 9558fae8ce97 cpufreq: intel_pstate: Read global.no_turbo under READ_ONCE(). Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 755c0a13f927..39cd3d3c53d5 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1292,7 +1292,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, goto unlock_driver; } - global.no_turbo = no_turbo; + WRITE_ONCE(global.no_turbo, no_turbo); mutex_lock(&intel_pstate_limits_lock); @@ -1747,7 +1747,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate) u32 vid; val = (u64)pstate << 8; - if (global.no_turbo && !global.turbo_disabled) + if (READ_ONCE(global.no_turbo) && !global.turbo_disabled) val |= (u64)1 << 32; vid_fp = cpudata->vid.min + mul_fp( @@ -1912,7 +1912,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate) u64 val; val = (u64)pstate << 8; - if (global.no_turbo && !global.turbo_disabled) + if (READ_ONCE(global.no_turbo) && !global.turbo_disabled) val |= (u64)1 << 32; return val; @@ -2210,7 +2210,7 @@ static inline int32_t get_target_pstate(struct cpudata *cpu) sample->busy_scaled = busy_frac * 100; - target = global.no_turbo || global.turbo_disabled ? + target = READ_ONCE(global.no_turbo) ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; target += target >> 2; target = mul_fp(target, busy_frac); @@ -2472,7 +2472,7 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu) static int intel_pstate_get_max_freq(struct cpudata *cpu) { - return global.turbo_disabled || global.no_turbo ? + return READ_ONCE(global.no_turbo) ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; } @@ -2609,7 +2609,7 @@ static void intel_pstate_verify_cpu_policy(struct cpudata *cpu, if (hwp_active) { intel_pstate_get_hwp_cap(cpu); - max_freq = global.no_turbo || global.turbo_disabled ? + max_freq = READ_ONCE(global.no_turbo) ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; } else { max_freq = intel_pstate_get_max_freq(cpu); -- Gitee From 304230dce1aefd756761349e5d4996670f0aafa4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Mar 2024 18:05:06 +0100 Subject: [PATCH 14/23] cpufreq: intel_pstate: Replace three global.turbo_disabled checks commit f32587dcbe5f40e160d8de262add6abab79356a7 upstream. Replace the global.turbo_disabled in __intel_pstate_update_max_freq() with a global.no_turbo one to make store_no_turbo() actually update the maximum CPU frequency on the trubo preference changes, which needs to be consistent with arch_set_max_freq_ratio() called from there. For more consistency, replace the global.turbo_disabled checks in __intel_pstate_cpu_init() and intel_cpufreq_adjust_perf() with global.no_turbo checks either. Intel-SIG: commit f32587dcbe5f cpufreq: intel_pstate: Replace three global.turbo_disabled checks. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 39cd3d3c53d5..50d2ee25b4c8 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1149,7 +1149,7 @@ static void intel_pstate_update_policies(void) static void __intel_pstate_update_max_freq(struct cpudata *cpudata, struct cpufreq_policy *policy) { - policy->cpuinfo.max_freq = global.turbo_disabled ? + policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ? cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; refresh_frequency_limits(policy); } @@ -2703,7 +2703,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_freq; - policy->cpuinfo.max_freq = global.turbo_disabled ? + policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; policy->min = policy->cpuinfo.min_freq; @@ -2906,8 +2906,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, int old_pstate = cpu->pstate.current_pstate; int cap_pstate, min_pstate, max_pstate, target_pstate; - cap_pstate = global.turbo_disabled ? HWP_GUARANTEED_PERF(hwp_cap) : - HWP_HIGHEST_PERF(hwp_cap); + cap_pstate = READ_ONCE(global.no_turbo) ? + HWP_GUARANTEED_PERF(hwp_cap) : + HWP_HIGHEST_PERF(hwp_cap); /* Optimization: Avoid unnecessary divisions. */ -- Gitee From d9fef3b0ca69f4d567314f4a92ffa67f78fdd7cb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 28 Mar 2024 19:52:45 +0100 Subject: [PATCH 15/23] cpufreq: intel_pstate: Update the maximum CPU frequency consistently commit e8217b4bece379e66d43ab5070431712f07bf625 upstream. There are 3 places at which the maximum CPU frequency may change, store_no_turbo(), intel_pstate_update_limits() (when called by the cpufreq core) and intel_pstate_notify_work() (when handling a HWP change notification). Currently, cpuinfo.max_freq is only updated by store_no_turbo() and intel_pstate_notify_work(), although it principle it may be necessary to update it in intel_pstate_update_limits() either. Make all of them mutually consistent. Intel-SIG: commit e8217b4bece3 cpufreq: intel_pstate: Update the maximum CPU frequency consistently. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 50d2ee25b4c8..c4c8da468be3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1149,18 +1149,32 @@ static void intel_pstate_update_policies(void) static void __intel_pstate_update_max_freq(struct cpudata *cpudata, struct cpufreq_policy *policy) { + intel_pstate_get_hwp_cap(cpudata); + policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ? cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; + refresh_frequency_limits(policy); } static void intel_pstate_update_limits(unsigned int cpu) { - mutex_lock(&intel_pstate_driver_lock); + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - cpufreq_update_policy(cpu); + if (!policy) + return; - mutex_unlock(&intel_pstate_driver_lock); + __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); + + cpufreq_cpu_release(policy); +} + +static void intel_pstate_update_limits_for_all(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + intel_pstate_update_limits(cpu); } /************************** sysfs begin ************************/ @@ -1307,7 +1321,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, mutex_unlock(&intel_pstate_limits_lock); - intel_pstate_update_policies(); + intel_pstate_update_limits_for_all(); arch_set_max_freq_ratio(no_turbo); unlock_driver: @@ -1591,7 +1605,6 @@ static void intel_pstate_notify_work(struct work_struct *work) struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu); if (policy) { - intel_pstate_get_hwp_cap(cpudata); __intel_pstate_update_max_freq(cpudata, policy); cpufreq_cpu_release(policy); -- Gitee From dc60ebc26c6547fa0c1cca40ca0cdcfc238e3932 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Apr 2024 10:06:45 +0200 Subject: [PATCH 16/23] cpufreq: intel_pstate: hide unused intel_pstate_cpu_oob_ids[] commit 8c556541a53848d6611ff8b5f9bf52e96c56f48e upstream. The reference to this variable is hidden in an #ifdef: drivers/cpufreq/intel_pstate.c:2440:32: error: 'intel_pstate_cpu_oob_ids' defined but not used [-Werror=unused-const-variable=] Use the same check around the definition. Intel-SIG: commit 8c556541a538 cpufreq: intel_pstate: hide unused intel_pstate_cpu_oob_ids[]. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index c4c8da468be3..43934afb866c 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2396,6 +2396,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); +#ifdef CONFIG_ACPI static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { X86_MATCH(BROADWELL_D, core_funcs), X86_MATCH(BROADWELL_X, core_funcs), @@ -2404,6 +2405,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), {} }; +#endif static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { X86_MATCH(KABYLAKE, core_funcs), -- Gitee From af76b5a8bc709ae2aa5e74bf7ebf1289459f7ad4 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sun, 5 May 2024 12:07:12 -0700 Subject: [PATCH 17/23] cpufreq: intel_pstate: fix struct cpudata::epp_cached kernel-doc commit 0a206fe35d360a9ec1c8b1609ca394c2759a8962 upstream. make C=1 currently gives the following warning: drivers/cpufreq/intel_pstate.c:262: warning: Function parameter or struct member 'epp_cached' not described in 'cpudata' Add the missing ":" to fix the trivial kernel-doc syntax error. Intel-SIG: commit 0a206fe35d36 cpufreq: intel_pstate: fix struct cpudata::epp_cached kernel-doc. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Jeff Johnson Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 43934afb866c..58feccf3b575 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -211,7 +211,7 @@ struct global_params { * @epp_policy: Last saved policy used to set EPP/EPB * @epp_default: Power on default HWP energy performance * preference/bias - * @epp_cached Cached HWP energy-performance preference value + * @epp_cached: Cached HWP energy-performance preference value * @hwp_req_cached: Cached value of the last HWP Request MSR * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR * @last_io_update: Last time when IO wake flag was set -- Gitee From 215fdc98b0a824905e118944a3ec50406756f182 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 31 May 2024 16:00:04 -0700 Subject: [PATCH 18/23] cpufreq: intel_pstate: Fix unchecked HWP MSR access commit 1e24c31351787e24b7eebe84866bd55fd62a0aef upstream. Fix unchecked MSR access error for processors with no HWP support. On such processors, maximum frequency can be changed by the system firmware using ACPI event ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED. This results in accessing HWP MSR 0x771. Call Trace: generic_exec_single+0x58/0x120 smp_call_function_single+0xbf/0x110 rdmsrl_on_cpu+0x46/0x60 intel_pstate_get_hwp_cap+0x1b/0x70 intel_pstate_update_limits+0x2a/0x60 acpi_processor_notify+0xb7/0x140 acpi_ev_notify_dispatch+0x3b/0x60 HWP MSR 0x771 can be only read on a CPU which supports HWP and enabled. Hence intel_pstate_get_hwp_cap() can only be called when hwp_active is true. Intel-SIG: commit 1e24c3135178 cpufreq: intel_pstate: Fix unchecked HWP MSR access. Backport intel_pstate driver update for 6.6 from 6.11 Reported-by: Sebastian Andrzej Siewior Closes: https://lore.kernel.org/linux-pm/20240529155740.Hq2Hw7be@linutronix.de/ Fixes: e8217b4bece3 ("cpufreq: intel_pstate: Update the maximum CPU frequency consistently") Tested-by: Sebastian Andrzej Siewior Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 58feccf3b575..b732b06bdc41 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1149,7 +1149,8 @@ static void intel_pstate_update_policies(void) static void __intel_pstate_update_max_freq(struct cpudata *cpudata, struct cpufreq_policy *policy) { - intel_pstate_get_hwp_cap(cpudata); + if (hwp_active) + intel_pstate_get_hwp_cap(cpudata); policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ? cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; -- Gitee From f7dabdaf92ecf80c0fd77b9a9b6219d5b7d12852 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Jun 2024 16:53:06 +0200 Subject: [PATCH 19/23] cpufreq: intel_pstate: Check turbo_is_disabled() in store_no_turbo() commit 350cbb5d2f676bff22c49e5e81764c3b8da342a9 upstream. After recent changes in intel_pstate, global.turbo_disabled is only set at the initialization time and never changed. However, it turns out that on some systems the "turbo disabled" bit in MSR_IA32_MISC_ENABLE, the initial state of which is reflected by global.turbo_disabled, can be flipped later and there should be a way to take that into account (other than checking that MSR every time the driver runs which is costly and useless overhead on the vast majority of systems). For this purpose, notice that before the changes in question, store_no_turbo() contained a turbo_is_disabled() check that was used for updating global.turbo_disabled if the "turbo disabled" bit in MSR_IA32_MISC_ENABLE had been flipped and that functionality can be restored. Then, users will be able to reset global.turbo_disabled by writing 0 to no_turbo which used to work before on systems with flipping "turbo disabled" bit. This guarantees the driver state to remain in sync, but READ_ONCE() annotations need to be added in two places where global.turbo_disabled is accessed locklessly, so modify the driver to make that happen. Intel-SIG: commit 350cbb5d2f67 cpufreq: intel_pstate: Check turbo_is_disabled() in store_no_turbo(). Backport intel_pstate driver update for 6.6 from 6.11 Fixes: 0940f1a8011f ("cpufreq: intel_pstate: Do not update global.turbo_disabled after initialization") Closes: https://lore.kernel.org/linux-pm/bf3ebf1571a4788e97daf861eb493c12d42639a3.camel@xry111.site Suggested-by: Srinivas Pandruvada Reported-by: Xi Ruoyao Tested-by: Xi Ruoyao Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b732b06bdc41..7d278117f143 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1298,12 +1298,17 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, no_turbo = !!clamp_t(int, input, 0, 1); - if (no_turbo == global.no_turbo) - goto unlock_driver; - - if (global.turbo_disabled) { - pr_notice_once("Turbo disabled by BIOS or unavailable on processor\n"); + WRITE_ONCE(global.turbo_disabled, turbo_is_disabled()); + if (global.turbo_disabled && !no_turbo) { + pr_notice("Turbo disabled by BIOS or unavailable on processor\n"); count = -EPERM; + if (global.no_turbo) + goto unlock_driver; + else + no_turbo = 1; + } + + if (no_turbo == global.no_turbo) { goto unlock_driver; } @@ -1761,7 +1766,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate) u32 vid; val = (u64)pstate << 8; - if (READ_ONCE(global.no_turbo) && !global.turbo_disabled) + if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled)) val |= (u64)1 << 32; vid_fp = cpudata->vid.min + mul_fp( @@ -1926,7 +1931,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate) u64 val; val = (u64)pstate << 8; - if (READ_ONCE(global.no_turbo) && !global.turbo_disabled) + if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled)) val |= (u64)1 << 32; return val; -- Gitee From 4b45aa4ba871b7fbddfb9af41971386fe6ff58f6 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 24 Jun 2024 09:11:08 -0700 Subject: [PATCH 20/23] x86/cpufeatures: Add HWP highest perf change feature flag commit 7ea81936b85317aee8a73cd35d7f9cd6ce654dee upstream. When CPUID[6].EAX[15] is set to 1, this CPU supports notification for HWP (Hardware P-states) highest performance change. Add a feature flag to check if the CPU supports HWP highest performance change. Intel-SIG: commit 7ea81936b853 x86/cpufeatures: Add HWP highest perf change feature flag. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240624161109.1427640-2-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 72f580e338e8..e6fe253c1f46 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -381,6 +381,7 @@ #define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ +#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* "" HWP Highest perf change */ #define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ -- Gitee From a117bb0680a28ac9edc1a2896eeeefc02d81f51c Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 24 Jun 2024 09:27:14 -0700 Subject: [PATCH 21/23] cpufreq: intel_pstate: Replace boot_cpu_has() commit acfc429e42f09524653af52998548cd9317892a6 upstream. Replace boot_cpu_has() with cpu_feature_enabled(). Intel-SIG: commit acfc429e42f0 cpufreq: intel_pstate: Replace boot_cpu_has(). Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240624162714.1431182-1-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7d278117f143..45c631c90eaa 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1628,7 +1628,7 @@ void notify_hwp_interrupt(void) unsigned long flags; u64 value; - if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + if (!hwp_active || !cpu_feature_enabled(X86_FEATURE_HWP_NOTIFY)) return; rdmsrl_safe(MSR_HWP_STATUS, &value); @@ -1657,7 +1657,7 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) unsigned long flags; bool cancel_work; - if (!boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + if (!cpu_feature_enabled(X86_FEATURE_HWP_NOTIFY)) return; /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ -- Gitee From fb6235da0878095859700707b16ed7548148c0a3 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 24 Jun 2024 09:11:09 -0700 Subject: [PATCH 22/23] cpufreq: intel_pstate: Support highest performance change interrupt commit d845cd901b28f1b6c02a208b864fc3fc46d14536 upstream. On some systems, the HWP (Hardware P-states) highest performance level can change from the value set at boot-up. This behavior can lead to two issues: - The 'cpuinfo_max_freq' within the 'cpufreq' sysfs will not reflect the CPU's highest achievable performance. - Even if the CPU's highest performance level is increased after booting, the CPU may not reach the full expected performance. The availability of this feature is indicated by the CPUID instruction: if CPUID[6].EAX[15] is set to 1, the feature is supported. When supported, setting bit 2 of the MSR_HWP_INTERRUPT register enables notifications of the highest performance level changes. Therefore, as part of enabling the HWP interrupt, bit 2 of the MSR_HWP_INTERRUPT should also be set when this feature is supported. Upon a change in the highest performance level, a new HWP interrupt is generated, with bit 3 of the MSR_HWP_STATUS register set, and the MSR_HWP_CAPABILITIES register is updated with the new highest performance limit. The processing of the interrupt is the same as the guaranteed performance change. Notify change to cpufreq core and update MSR_HWP_REQUEST with new performance limits. The current driver implementation already takes care of the highest performance change as part of: commit dfeeedc1bf57 ("cpufreq: intel_pstate: Update cpuinfo.max_freq on HWP_CAP changes") For example: Before highest performance change interrupt: cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq 3700000 cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq 3700000 After highest performance changes interrupt: cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq 3900000 cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq 3900000 Intel-SIG: commit d845cd901b28 cpufreq: intel_pstate: Support highest performance change interrupt. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240624161109.1427640-3-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 45c631c90eaa..e01c31ef39bc 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1622,17 +1622,24 @@ static void intel_pstate_notify_work(struct work_struct *work) static DEFINE_RAW_SPINLOCK(hwp_notify_lock); static cpumask_t hwp_intr_enable_mask; +#define HWP_GUARANTEED_PERF_CHANGE_STATUS BIT(0) +#define HWP_HIGHEST_PERF_CHANGE_STATUS BIT(3) + void notify_hwp_interrupt(void) { unsigned int this_cpu = smp_processor_id(); + u64 value, status_mask; unsigned long flags; - u64 value; if (!hwp_active || !cpu_feature_enabled(X86_FEATURE_HWP_NOTIFY)) return; + status_mask = HWP_GUARANTEED_PERF_CHANGE_STATUS; + if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) + status_mask |= HWP_HIGHEST_PERF_CHANGE_STATUS; + rdmsrl_safe(MSR_HWP_STATUS, &value); - if (!(value & 0x01)) + if (!(value & status_mask)) return; raw_spin_lock_irqsave(&hwp_notify_lock, flags); @@ -1671,10 +1678,14 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) cancel_delayed_work_sync(&cpudata->hwp_notify_work); } +#define HWP_GUARANTEED_PERF_CHANGE_REQ BIT(0) +#define HWP_HIGHEST_PERF_CHANGE_REQ BIT(2) + static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) { - /* Enable HWP notification interrupt for guaranteed performance change */ + /* Enable HWP notification interrupt for performance change */ if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) { + u64 interrupt_mask = HWP_GUARANTEED_PERF_CHANGE_REQ; unsigned long flags; raw_spin_lock_irqsave(&hwp_notify_lock, flags); @@ -1682,8 +1693,11 @@ static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask); raw_spin_unlock_irqrestore(&hwp_notify_lock, flags); + if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) + interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ; + /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ - wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01); + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, interrupt_mask); wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); } } -- Gitee From 5a94e0dfa682f482f6d38a9a04bfb3ad83b1a190 Mon Sep 17 00:00:00 2001 From: Pedro Henrique Kopper Date: Thu, 1 Aug 2024 13:41:50 -0300 Subject: [PATCH 23/23] cpufreq: intel_pstate: Update Balance performance EPP for Emerald Rapids commit 64a66f4a3c89b4602ee1e6cd23b28729fc4562b3 upstream. On Intel Emerald Rapids machines, we ship the Energy Performance Preference (EPP) default for balance_performance as 128. However, during an internal investigation together with Intel, we have determined that 32 is a more suitable value. This leads to significant improvements in both performance and energy: POV-Ray: 32% faster | 12% less energy OpenSSL: 12% faster | energy within 1% Build Linux Kernel: 29% faster | 18% less energy Therefore, we should move the default EPP for balance_performance to 32. This is in line with what has already been done for Sapphire Rapids. Intel-SIG: commit 64a66f4a3c89 cpufreq: intel_pstate: Update Balance performance EPP for Emerald Rapids. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Pedro Henrique Kopper Acked-by: Srinivas Pandruvada Link: https://patch.msgid.link/Zqu6zjVMoiXwROBI@capivara Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e01c31ef39bc..2e894d7ec591 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3406,6 +3406,7 @@ static const struct x86_cpu_id intel_epp_default[] = { */ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), {} }; -- Gitee