diff --git a/0001-x86-Set-preferred-CPU-features-on-the-KH-40000-and-K.patch b/0001-x86-Set-preferred-CPU-features-on-the-KH-40000-and-K.patch new file mode 100644 index 0000000000000000000000000000000000000000..933a8084e6f967fba91cfc8d8a7aa86218220b41 --- /dev/null +++ b/0001-x86-Set-preferred-CPU-features-on-the-KH-40000-and-K.patch @@ -0,0 +1,100 @@ +From 36d76b69a2af34c3ebeb2a8e0550d7966cc5f58d Mon Sep 17 00:00:00 2001 +From: MayShao-oc +Date: Fri, 22 Aug 2025 15:21:45 +0800 +Subject: [PATCH 1/2] x86: Set preferred CPU features on the KH-40000 and + KX-7000 Zhaoxin processors + +Fix code formatting under the Zhaoxin branch and add comments for +different Zhaoxin models. + +Unaligned AVX load are slower on KH-40000 and KX-7000, so disable +the AVX_Fast_Unaligned_Load. + +Enable Prefer_No_VZEROUPPER and Fast_Unaligned_Load features to +use sse2_unaligned version of memset,strcpy and strcat. +--- + sysdeps/x86/cpu-features.c | 51 ++++++++++++++++++++++++++------------ + 1 file changed, 35 insertions(+), 16 deletions(-) + +diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c +index 60066824..740bf55f 100644 +--- a/sysdeps/x86/cpu-features.c ++++ b/sysdeps/x86/cpu-features.c +@@ -644,39 +644,58 @@ disable_tsx: + + model += extended_model; + if (family == 0x6) +- { +- if (model == 0xf || model == 0x19) +- { ++ { ++ /* Tuning for older Zhaoxin processors. */ ++ if (model == 0xf || model == 0x19) ++ { + CPU_FEATURE_UNSET (cpu_features, AVX); + CPU_FEATURE_UNSET (cpu_features, AVX2); + +- cpu_features->preferred[index_arch_Slow_SSE4_2] +- |= bit_arch_Slow_SSE4_2; ++ cpu_features->preferred[index_arch_Slow_SSE4_2] ++ |= bit_arch_Slow_SSE4_2; + ++ /* Unaligned AVX loads are slower. */ + cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] +- &= ~bit_arch_AVX_Fast_Unaligned_Load; +- } +- } ++ &= ~bit_arch_AVX_Fast_Unaligned_Load; ++ } ++ } + else if (family == 0x7) +- { +- if (model == 0x1b) ++ { ++ switch (model) + { ++ /* Wudaokou microarch tuning. */ ++ case 0x1b: + CPU_FEATURE_UNSET (cpu_features, AVX); + CPU_FEATURE_UNSET (cpu_features, AVX2); + + cpu_features->preferred[index_arch_Slow_SSE4_2] +- |= bit_arch_Slow_SSE4_2; ++ |= bit_arch_Slow_SSE4_2; + + cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] +- &= ~bit_arch_AVX_Fast_Unaligned_Load; +- } +- else if (model == 0x3b) +- { ++ &= ~bit_arch_AVX_Fast_Unaligned_Load; ++ break; ++ ++ /* Lujiazui microarch tuning. */ ++ case 0x3b: + CPU_FEATURE_UNSET (cpu_features, AVX); + CPU_FEATURE_UNSET (cpu_features, AVX2); + + cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] +- &= ~bit_arch_AVX_Fast_Unaligned_Load; ++ &= ~bit_arch_AVX_Fast_Unaligned_Load; ++ break; ++ ++ /* Yongfeng and Shijidadao mircoarch tuning. */ ++ case 0x5b: ++ case 0x6b: ++ cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] ++ &= ~bit_arch_AVX_Fast_Unaligned_Load; ++ ++ /* To use sse2_unaligned versions of memset, strcpy and strcat. ++ */ ++ cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER] ++ |= (bit_arch_Prefer_No_VZEROUPPER ++ | bit_arch_Fast_Unaligned_Load); ++ break; + } + } + } +-- +2.34.1 + diff --git a/0002-x86-Set-shared-to-the-size-of-the-LLC-for-Zhaoxin-pr.patch b/0002-x86-Set-shared-to-the-size-of-the-LLC-for-Zhaoxin-pr.patch new file mode 100644 index 0000000000000000000000000000000000000000..e1554184c5e665b16c8d0510fbd142a93f5967f9 --- /dev/null +++ b/0002-x86-Set-shared-to-the-size-of-the-LLC-for-Zhaoxin-pr.patch @@ -0,0 +1,42 @@ +From 2addf16e959222c8065ba001cdf78067e3e712d9 Mon Sep 17 00:00:00 2001 +From: MayShao-oc +Date: Sat, 23 Aug 2025 14:22:24 +0800 +Subject: [PATCH 2/2] x86: Set shared to the size of the LLC for Zhaoxin + processors + +Memcpy-ssse3.S use half of the '__x86_shared_cache_size'(which is +equal to 'shared') as the threshold for using non-temporal +instructions. + +Current 'shared' set to per-threads share of LLC. This patch updates +the value to the size of the entire LLC on Zhaoxin processors. +--- + sysdeps/x86/dl-cacheinfo.h | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h +index a88c31a1..5811b187 100644 +--- a/sysdeps/x86/dl-cacheinfo.h ++++ b/sysdeps/x86/dl-cacheinfo.h +@@ -768,6 +768,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + level3_cache_linesize = handle_zhaoxin (_SC_LEVEL3_CACHE_LINESIZE); + + get_common_cache_info (&shared, &threads, core); ++ if (threads != 0) ++ shared = shared * threads; + } + else if (cpu_features->basic.kind == arch_kind_amd || cpu_features->basic.kind == arch_kind_hygon) + { +@@ -925,7 +927,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + shared = tunable_size; + + /* keep x86 to use the same non_temporal_threshold like glibc2.28 */ +- if (threads != 0 && cpu_features->basic.kind != arch_kind_hygon) ++ if (threads != 0 && cpu_features->basic.kind != arch_kind_hygon ++ && cpu_features->basic.kind != arch_kind_zhaoxin) + non_temporal_threshold *= threads; + + tunable_size = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL); +-- +2.34.1 + diff --git a/glibc.spec b/glibc.spec index 6d953f59cf0b9f17175881590e566f3a37835ba4..87fea7cb918791792476065ffec71502f683b91c 100644 --- a/glibc.spec +++ b/glibc.spec @@ -71,7 +71,7 @@ ############################################################################## Name: glibc Version: 2.34 -Release: 170 +Release: 171 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -381,6 +381,8 @@ Patch9053: Use-THP-for-dynamic-shared-library.patch Patch9054: try-to-enable-system-thp-ability-when-LD_HUGEPAGE_LI.patch Patch9055: x86-Add-new-architecture-type-for-Hygon-processors.patch Patch9056: x86-Avoid-non_temporal_threshold-calculation-to-Hygo.patch +Patch9057: 0001-x86-Set-preferred-CPU-features-on-the-KH-40000-and-K.patch +Patch9058: 0002-x86-Set-shared-to-the-size-of-the-LLC-for-Zhaoxin-pr.patch Provides: ldconfig rtld(GNU_HASH) bundled(gnulib) @@ -1556,6 +1558,9 @@ fi %endif %changelog +* Thu Sep 04 2025 MayShao 2.34-171 +- x86: Set preferred CPU features and update Shared to LLC for Zhaoxin processors + * Wed Jul 30 2025 Qingqing Li 2.34-170 - posix: Fix double-free after allocation failure in regcomp (bug 33185) CVE-2025-8058