1 files changed, 10299 insertions, 8 deletions
diff --git a/pkgs/development/libraries/glibc/2.39-master.patch b/pkgs/development/libraries/glibc/2.39-master.patch
index 3e0815573f5eb..88e629981e3ce 100644
--- a/pkgs/development/libraries/glibc/2.39-master.patch
+++ b/pkgs/development/libraries/glibc/2.39-master.patch
@@ -6,14 +6,6 @@ Date:   Wed Jan 31 02:12:43 2024 +0100
     
     Signed-off-by: Andreas K. Hüttel <dilfridge@gentoo.org>
 
-diff --git a/ADVISORIES b/ADVISORIES
-new file mode 100644
-index 0000000000..d4e33f2df3
---- /dev/null
-+++ b/ADVISORIES
-@@ -0,0 +1,2 @@
-+For the GNU C Library Security Advisories, see the git master branch:
-+https://sourceware.org/git/?p=glibc.git;a=tree;f=advisories;hb=HEAD
 diff --git a/advisories/GLIBC-SA-2023-0001 b/advisories/GLIBC-SA-2023-0001
 deleted file mode 100644
 index 3d19c91b6a..0000000000
@@ -564,3 +556,10302 @@ index 470676ab2b..2bc7124983 100644
 -#define TEST_FUNCTION do_test ()
 -#include "../test-skeleton.c"
 +#include <support/test-driver.c>
+
+commit 1b9c1a0047fb26a65a9b2a7b8cd977243f7d353c
+Author: Jakub Jelinek <jakub@redhat.com>
+Date:   Wed Jan 31 19:17:27 2024 +0100
+
+    Use gcc __builtin_stdc_* builtins in stdbit.h if possible
+    
+    The following patch uses the GCC 14 __builtin_stdc_* builtins in stdbit.h
+    for the type-generic macros, so that when compiled with GCC 14 or later,
+    it supports not just 8/16/32/64-bit unsigned integers, but also 128-bit
+    (if target supports them) and unsigned _BitInt (any supported precision).
+    And so that the macros don't expand arguments multiple times and can be
+    evaluated in constant expressions.
+    
+    The new testcase is gcc's gcc/testsuite/gcc.dg/builtin-stdc-bit-1.c
+    adjusted to test stdbit.h and the type-generic macros in there instead
+    of the builtins and adjusted to use glibc test framework rather than
+    gcc style tests with __builtin_abort ().
+    
+    Signed-off-by: Jakub Jelinek <jakub@redhat.com>
+    Reviewed-by: Joseph Myers <josmyers@redhat.com>
+    (cherry picked from commit da89496337b97e6a2aaf1e81d55cf998f6db1070)
+
+diff --git a/manual/stdbit.texi b/manual/stdbit.texi
+index fe41c671d8..6c75ed9a20 100644
+--- a/manual/stdbit.texi
++++ b/manual/stdbit.texi
+@@ -32,7 +32,13 @@ and @code{unsigned long long int}.  In addition, there is a
+ corresponding type-generic macro (not listed below), named the same as
+ the functions but without any suffix such as @samp{_uc}.  The
+ type-generic macro can only be used with an argument of an unsigned
+-integer type with a width of 8, 16, 32 or 64 bits.
++integer type with a width of 8, 16, 32 or 64 bits, or when using
++a compiler with support for
++@uref{https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html,@code{__builtin_stdc_bit_ceil}},
++etc.@:, built-in functions such as GCC 14.1 or later
++any unsigned integer type those built-in functions support.
++In GCC 14.1 that includes support for @code{unsigned __int128} and
++@code{unsigned _BitInt(@var{n})} if supported by the target.
+ 
+ @deftypefun {unsigned int} stdc_leading_zeros_uc (unsigned char @var{x})
+ @deftypefunx {unsigned int} stdc_leading_zeros_us (unsigned short @var{x})
+diff --git a/stdlib/Makefile b/stdlib/Makefile
+index d587f054d1..9898cc5d8a 100644
+--- a/stdlib/Makefile
++++ b/stdlib/Makefile
+@@ -308,6 +308,7 @@ tests := \
+   tst-setcontext10 \
+   tst-setcontext11 \
+   tst-stdbit-Wconversion \
++  tst-stdbit-builtins \
+   tst-stdc_bit_ceil \
+   tst-stdc_bit_floor \
+   tst-stdc_bit_width \
+diff --git a/stdlib/stdbit.h b/stdlib/stdbit.h
+index f334eb174d..2801590c63 100644
+--- a/stdlib/stdbit.h
++++ b/stdlib/stdbit.h
+@@ -64,9 +64,13 @@ extern unsigned int stdc_leading_zeros_ul (unsigned long int __x)
+ __extension__
+ extern unsigned int stdc_leading_zeros_ull (unsigned long long int __x)
+      __THROW __attribute_const__;
+-#define stdc_leading_zeros(x)				\
++#if __glibc_has_builtin (__builtin_stdc_leading_zeros)
++# define stdc_leading_zeros(x) (__builtin_stdc_leading_zeros (x))
++#else
++# define stdc_leading_zeros(x)				\
+   (stdc_leading_zeros_ull (x)				\
+    - (unsigned int) (8 * (sizeof (0ULL) - sizeof (x))))
++#endif
+ 
+ #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
+ static __always_inline unsigned int
+@@ -116,9 +120,13 @@ extern unsigned int stdc_leading_ones_ul (unsigned long int __x)
+ __extension__
+ extern unsigned int stdc_leading_ones_ull (unsigned long long int __x)
+      __THROW __attribute_const__;
+-#define stdc_leading_ones(x)					\
++#if __glibc_has_builtin (__builtin_stdc_leading_ones)
++# define stdc_leading_ones(x) (__builtin_stdc_leading_ones (x))
++#else
++# define stdc_leading_ones(x)					\
+   (stdc_leading_ones_ull ((unsigned long long int) (x)		\
+ 			  << 8 * (sizeof (0ULL) - sizeof (x))))
++#endif
+ 
+ #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
+ static __always_inline unsigned int
+@@ -168,11 +176,15 @@ extern unsigned int stdc_trailing_zeros_ul (unsigned long int __x)
+ __extension__
+ extern unsigned int stdc_trailing_zeros_ull (unsigned long long int __x)
+      __THROW __attribute_const__;
+-#define stdc_trailing_zeros(x)				\
++#if __glibc_has_builtin (__builtin_stdc_trailing_zeros)
++# define stdc_trailing_zeros(x) (__builtin_stdc_trailing_zeros (x))
++#else
++# define stdc_trailing_zeros(x)				\
+   (sizeof (x) == 8 ? stdc_trailing_zeros_ull (x)	\
+    : sizeof (x) == 4 ? stdc_trailing_zeros_ui (x)	\
+    : sizeof (x) == 2 ? stdc_trailing_zeros_us (__pacify_uint16 (x))	\
+    : stdc_trailing_zeros_uc (__pacify_uint8 (x)))
++#endif
+ 
+ #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll)
+ static __always_inline unsigned int
+@@ -222,7 +234,11 @@ extern unsigned int stdc_trailing_ones_ul (unsigned long int __x)
+ __extension__
+ extern unsigned int stdc_trailing_ones_ull (unsigned long long int __x)
+      __THROW __attribute_const__;
+-#define stdc_trailing_ones(x) (stdc_trailing_ones_ull (x))
++#if __glibc_has_builtin (__builtin_stdc_trailing_ones)
++# define stdc_trailing_ones(x) (__builtin_stdc_trailing_ones (x))
++#else
++# define stdc_trailing_ones(x) (stdc_trailing_ones_ull (x))
++#endif
+ 
+ #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll)
+ static __always_inline unsigned int
+@@ -272,11 +288,15 @@ extern unsigned int stdc_first_leading_zero_ul (unsigned long int __x)
+ __extension__
+ extern unsigned int stdc_first_leading_zero_ull (unsigned long long int __x)
+      __THROW __attribute_const__;
+-#define stdc_first_leading_zero(x)			\
++#if __glibc_has_builtin (__builtin_stdc_first_leading_zero)
++# define stdc_first_leading_zero(x) (__builtin_stdc_first_leading_zero (x))
++#else
++# define stdc_first_leading_zero(x)			\
+   (sizeof (x) == 8 ? stdc_first_leading_zero_ull (x)	\
+    : sizeof (x) == 4 ? stdc_first_leading_zero_ui (x)	\
+    : sizeof (x) == 2 ? stdc_first_leading_zero_us (__pacify_uint16 (x))	\
+    : stdc_first_leading_zero_uc (__pacify_uint8 (x)))
++#endif
+ 
+ #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
+ static __always_inline unsigned int
+@@ -326,11 +346,15 @@ extern unsigned int stdc_first_leading_one_ul (unsigned long int __x)
+ __extension__
+ extern unsigned int stdc_first_leading_one_ull (unsigned long long int __x)
+      __THROW __attribute_const__;
+-#define stdc_first_leading_one(x)			\
++#if __glibc_has_builtin (__builtin_stdc_first_leading_one)
++# define stdc_first_leading_one(x) (__builtin_stdc_first_leading_one (x))
++#else
++# define stdc_first_leading_one(x)			\
+   (sizeof (x) == 8 ? stdc_first_leading_one_ull (x)	\
+    : sizeof (x) == 4 ? stdc_first_leading_one_ui (x)	\
+    : sizeof (x) == 2 ? stdc_first_leading_one_us (__pacify_uint16 (x))	\
+    : stdc_first_leading_one_uc (__pacify_uint8 (x)))
++#endif
+ 
+ #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
+ static __always_inline unsigned int
+@@ -380,11 +404,15 @@ extern unsigned int stdc_first_trailing_zero_ul (unsigned long int __x)
+ __extension__
+ extern unsigned int stdc_first_trailing_zero_ull (unsigned long long int __x)
+      __THROW __attribute_const__;
+-#define stdc_first_trailing_zero(x)			\
++#if __glibc_has_builtin (__builtin_stdc_first_trailing_zero)
++# define stdc_first_trailing_zero(x) (__builtin_stdc_first_trailing_zero (x))
++#else
++# define stdc_first_trailing_zero(x)			\
+   (sizeof (x) == 8 ? stdc_first_trailing_zero_ull (x)	\
+    : sizeof (x) == 4 ? stdc_first_trailing_zero_ui (x)	\
+    : sizeof (x) == 2 ? stdc_first_trailing_zero_us (__pacify_uint16 (x)) \
+    : stdc_first_trailing_zero_uc (__pacify_uint8 (x)))
++#endif
+ 
+ #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll)
+ static __always_inline unsigned int
+@@ -434,11 +462,15 @@ extern unsigned int stdc_first_trailing_one_ul (unsigned long int __x)
+ __extension__
+ extern unsigned int stdc_first_trailing_one_ull (unsigned long long int __x)
+      __THROW __attribute_const__;
+-#define stdc_first_trailing_one(x)			\
++#if __glibc_has_builtin (__builtin_stdc_first_trailing_one)
++# define stdc_first_trailing_one(x) (__builtin_stdc_first_trailing_one (x))
++#else
++# define stdc_first_trailing_one(x)			\
+   (sizeof (x) == 8 ? stdc_first_trailing_one_ull (x)	\
+    : sizeof (x) == 4 ? stdc_first_trailing_one_ui (x)	\
+    : sizeof (x) == 2 ? stdc_first_trailing_one_us (__pacify_uint16 (x))	\
+    : stdc_first_trailing_one_uc (__pacify_uint8 (x)))
++#endif
+ 
+ #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll)
+ static __always_inline unsigned int
+@@ -488,9 +520,13 @@ extern unsigned int stdc_count_zeros_ul (unsigned long int __x)
+ __extension__
+ extern unsigned int stdc_count_zeros_ull (unsigned long long int __x)
+      __THROW __attribute_const__;
+-#define stdc_count_zeros(x)				\
++#if __glibc_has_builtin (__builtin_stdc_count_zeros)
++# define stdc_count_zeros(x) (__builtin_stdc_count_zeros (x))
++#else
++# define stdc_count_zeros(x)				\
+   (stdc_count_zeros_ull (x)				\
+    - (unsigned int) (8 * (sizeof (0ULL) - sizeof (x))))
++#endif
+ 
+ #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_popcountll)
+ static __always_inline unsigned int
+@@ -540,7 +576,11 @@ extern unsigned int stdc_count_ones_ul (unsigned long int __x)
+ __extension__
+ extern unsigned int stdc_count_ones_ull (unsigned long long int __x)
+      __THROW __attribute_const__;
+-#define stdc_count_ones(x) (stdc_count_ones_ull (x))
++#if __glibc_has_builtin (__builtin_stdc_count_ones)
++# define stdc_count_ones(x) (__builtin_stdc_count_ones (x))
++#else
++# define stdc_count_ones(x) (stdc_count_ones_ull (x))
++#endif
+ 
+ #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_popcountll)
+ static __always_inline unsigned int
+@@ -590,10 +630,14 @@ extern bool stdc_has_single_bit_ul (unsigned long int __x)
+ __extension__
+ extern bool stdc_has_single_bit_ull (unsigned long long int __x)
+      __THROW __attribute_const__;
+-#define stdc_has_single_bit(x)				\
++#if __glibc_has_builtin (__builtin_stdc_has_single_bit)
++# define stdc_has_single_bit(x) (__builtin_stdc_has_single_bit (x))
++#else
++# define stdc_has_single_bit(x)				\
+   ((bool) (sizeof (x) <= sizeof (unsigned int)		\
+ 	   ? stdc_has_single_bit_ui (x)			\
+ 	   : stdc_has_single_bit_ull (x)))
++#endif
+ 
+ static __always_inline bool
+ __hsb64_inline (uint64_t __x)
+@@ -641,7 +685,11 @@ extern unsigned int stdc_bit_width_ul (unsigned long int __x)
+ __extension__
+ extern unsigned int stdc_bit_width_ull (unsigned long long int __x)
+      __THROW __attribute_const__;
+-#define stdc_bit_width(x) (stdc_bit_width_ull (x))
++#if __glibc_has_builtin (__builtin_stdc_bit_width)
++# define stdc_bit_width(x) (__builtin_stdc_bit_width (x))
++#else
++# define stdc_bit_width(x) (stdc_bit_width_ull (x))
++#endif
+ 
+ #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
+ static __always_inline unsigned int
+@@ -691,7 +739,11 @@ extern unsigned long int stdc_bit_floor_ul (unsigned long int __x)
+ __extension__
+ extern unsigned long long int stdc_bit_floor_ull (unsigned long long int __x)
+      __THROW __attribute_const__;
+-#define stdc_bit_floor(x) ((__typeof (x)) stdc_bit_floor_ull (x))
++#if __glibc_has_builtin (__builtin_stdc_bit_floor)
++# define stdc_bit_floor(x) (__builtin_stdc_bit_floor (x))
++#else
++# define stdc_bit_floor(x) ((__typeof (x)) stdc_bit_floor_ull (x))
++#endif
+ 
+ #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
+ static __always_inline uint64_t
+@@ -743,7 +795,11 @@ extern unsigned long int stdc_bit_ceil_ul (unsigned long int __x)
+ __extension__
+ extern unsigned long long int stdc_bit_ceil_ull (unsigned long long int __x)
+      __THROW __attribute_const__;
+-#define stdc_bit_ceil(x) ((__typeof (x)) stdc_bit_ceil_ull (x))
++#if __glibc_has_builtin (__builtin_stdc_bit_ceil)
++# define stdc_bit_ceil(x) (__builtin_stdc_bit_ceil (x))
++#else
++# define stdc_bit_ceil(x) ((__typeof (x)) stdc_bit_ceil_ull (x))
++#endif
+ 
+ #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
+ static __always_inline uint64_t
+diff --git a/stdlib/tst-stdbit-builtins.c b/stdlib/tst-stdbit-builtins.c
+new file mode 100644
+index 0000000000..536841ca8a
+--- /dev/null
++++ b/stdlib/tst-stdbit-builtins.c
+@@ -0,0 +1,778 @@
++/* Test <stdbit.h> type-generic macros with compiler __builtin_stdc_* support.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <stdbit.h>
++#include <limits.h>
++#include <support/check.h>
++
++#if __glibc_has_builtin (__builtin_stdc_leading_zeros) \
++    && __glibc_has_builtin (__builtin_stdc_leading_ones) \
++    && __glibc_has_builtin (__builtin_stdc_trailing_zeros) \
++    && __glibc_has_builtin (__builtin_stdc_trailing_ones) \
++    && __glibc_has_builtin (__builtin_stdc_first_leading_zero) \
++    && __glibc_has_builtin (__builtin_stdc_first_leading_one) \
++    && __glibc_has_builtin (__builtin_stdc_first_trailing_zero) \
++    && __glibc_has_builtin (__builtin_stdc_first_trailing_one) \
++    && __glibc_has_builtin (__builtin_stdc_count_zeros) \
++    && __glibc_has_builtin (__builtin_stdc_count_ones) \
++    && __glibc_has_builtin (__builtin_stdc_has_single_bit) \
++    && __glibc_has_builtin (__builtin_stdc_bit_width) \
++    && __glibc_has_builtin (__builtin_stdc_bit_floor) \
++    && __glibc_has_builtin (__builtin_stdc_bit_ceil)
++
++# if !defined (BITINT_MAXWIDTH) && defined (__BITINT_MAXWIDTH__)
++#  define BITINT_MAXWIDTH __BITINT_MAXWIDTH__
++# endif
++
++typedef unsigned char uc;
++typedef unsigned short us;
++typedef unsigned int ui;
++typedef unsigned long int ul;
++typedef unsigned long long int ull;
++
++# define expr_has_type(e, t) _Generic (e, default : 0, t : 1)
++
++static int
++do_test (void)
++{
++  TEST_COMPARE (stdc_leading_zeros ((uc) 0), CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_leading_zeros ((uc) 0), ui), 1);
++  TEST_COMPARE (stdc_leading_zeros ((us) 0), sizeof (short) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_leading_zeros ((us) 0), ui), 1);
++  TEST_COMPARE (stdc_leading_zeros (0U), sizeof (int) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_leading_zeros (0U), ui), 1);
++  TEST_COMPARE (stdc_leading_zeros (0UL), sizeof (long int) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_leading_zeros (0UL), ui), 1);
++  TEST_COMPARE (stdc_leading_zeros (0ULL), sizeof (long long int) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_leading_zeros (0ULL), ui), 1);
++  TEST_COMPARE (stdc_leading_zeros ((uc) ~0U), 0);
++  TEST_COMPARE (stdc_leading_zeros ((us) ~0U), 0);
++  TEST_COMPARE (stdc_leading_zeros (~0U), 0);
++  TEST_COMPARE (stdc_leading_zeros (~0UL), 0);
++  TEST_COMPARE (stdc_leading_zeros (~0ULL), 0);
++  TEST_COMPARE (stdc_leading_zeros ((uc) 3), CHAR_BIT - 2);
++  TEST_COMPARE (stdc_leading_zeros ((us) 9), sizeof (short) * CHAR_BIT - 4);
++  TEST_COMPARE (stdc_leading_zeros (34U), sizeof (int) * CHAR_BIT - 6);
++  TEST_COMPARE (stdc_leading_zeros (130UL), sizeof (long int) * CHAR_BIT - 8);
++  TEST_COMPARE (stdc_leading_zeros (512ULL),
++		sizeof (long long int) * CHAR_BIT - 10);
++  TEST_COMPARE (stdc_leading_ones ((uc) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_leading_ones ((uc) 0), ui), 1);
++  TEST_COMPARE (stdc_leading_ones ((us) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_leading_ones ((us) 0), ui), 1);
++  TEST_COMPARE (stdc_leading_ones (0U), 0);
++  TEST_COMPARE (expr_has_type (stdc_leading_ones (0U), ui), 1);
++  TEST_COMPARE (stdc_leading_ones (0UL), 0);
++  TEST_COMPARE (expr_has_type (stdc_leading_ones (0UL), ui), 1);
++  TEST_COMPARE (stdc_leading_ones (0ULL), 0);
++  TEST_COMPARE (expr_has_type (stdc_leading_ones (0ULL), ui), 1);
++  TEST_COMPARE (stdc_leading_ones ((uc) ~0U), CHAR_BIT);
++  TEST_COMPARE (stdc_leading_ones ((us) ~0U), sizeof (short) * CHAR_BIT);
++  TEST_COMPARE (stdc_leading_ones (~0U), sizeof (int) * CHAR_BIT);
++  TEST_COMPARE (stdc_leading_ones (~0UL), sizeof (long int) * CHAR_BIT);
++  TEST_COMPARE (stdc_leading_ones (~0ULL), sizeof (long long int) * CHAR_BIT);
++  TEST_COMPARE (stdc_leading_ones ((uc) ~3), CHAR_BIT - 2);
++  TEST_COMPARE (stdc_leading_ones ((us) ~9), sizeof (short) * CHAR_BIT - 4);
++  TEST_COMPARE (stdc_leading_ones (~34U), sizeof (int) * CHAR_BIT - 6);
++  TEST_COMPARE (stdc_leading_ones (~130UL), sizeof (long int) * CHAR_BIT - 8);
++  TEST_COMPARE (stdc_leading_ones (~512ULL),
++		sizeof (long long int) * CHAR_BIT - 10);
++  TEST_COMPARE (stdc_trailing_zeros ((uc) 0), CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((uc) 0), ui), 1);
++  TEST_COMPARE (stdc_trailing_zeros ((us) 0), sizeof (short) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((us) 0), ui), 1);
++  TEST_COMPARE (stdc_trailing_zeros (0U), sizeof (int) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0U), ui), 1);
++  TEST_COMPARE (stdc_trailing_zeros (0UL), sizeof (long int) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0UL), ui), 1);
++  TEST_COMPARE (stdc_trailing_zeros (0ULL), sizeof (long long int) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0ULL), ui), 1);
++  TEST_COMPARE (stdc_trailing_zeros ((uc) ~0U), 0);
++  TEST_COMPARE (stdc_trailing_zeros ((us) ~0U), 0);
++  TEST_COMPARE (stdc_trailing_zeros (~0U), 0);
++  TEST_COMPARE (stdc_trailing_zeros (~0UL), 0);
++  TEST_COMPARE (stdc_trailing_zeros (~0ULL), 0);
++  TEST_COMPARE (stdc_trailing_zeros ((uc) 2), 1);
++  TEST_COMPARE (stdc_trailing_zeros ((us) 24), 3);
++  TEST_COMPARE (stdc_trailing_zeros (32U), 5);
++  TEST_COMPARE (stdc_trailing_zeros (128UL), 7);
++  TEST_COMPARE (stdc_trailing_zeros (512ULL), 9);
++  TEST_COMPARE (stdc_trailing_ones ((uc) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_trailing_ones ((uc) 0), ui), 1);
++  TEST_COMPARE (stdc_trailing_ones ((us) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_trailing_ones ((us) 0), ui), 1);
++  TEST_COMPARE (stdc_trailing_ones (0U), 0);
++  TEST_COMPARE (expr_has_type (stdc_trailing_ones (0U), ui), 1);
++  TEST_COMPARE (stdc_trailing_ones (0UL), 0);
++  TEST_COMPARE (expr_has_type (stdc_trailing_ones (0UL), ui), 1);
++  TEST_COMPARE (stdc_trailing_ones (0ULL), 0);
++  TEST_COMPARE (expr_has_type (stdc_trailing_ones (0ULL), ui), 1);
++  TEST_COMPARE (stdc_trailing_ones ((uc) ~0U), CHAR_BIT);
++  TEST_COMPARE (stdc_trailing_ones ((us) ~0U), sizeof (short) * CHAR_BIT);
++  TEST_COMPARE (stdc_trailing_ones (~0U), sizeof (int) * CHAR_BIT);
++  TEST_COMPARE (stdc_trailing_ones (~0UL), sizeof (long int) * CHAR_BIT);
++  TEST_COMPARE (stdc_trailing_ones (~0ULL), sizeof (long long int) * CHAR_BIT);
++  TEST_COMPARE (stdc_trailing_ones ((uc) 5), 1);
++  TEST_COMPARE (stdc_trailing_ones ((us) 15), 4);
++  TEST_COMPARE (stdc_trailing_ones (127U), 7);
++  TEST_COMPARE (stdc_trailing_ones (511UL), 9);
++  TEST_COMPARE (stdc_trailing_ones (~0ULL >> 2),
++		sizeof (long long int) * CHAR_BIT - 2);
++  TEST_COMPARE (stdc_first_leading_zero ((uc) 0), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((uc) 0), ui), 1);
++  TEST_COMPARE (stdc_first_leading_zero ((us) 0), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((us) 0), ui), 1);
++  TEST_COMPARE (stdc_first_leading_zero (0U), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0U), ui), 1);
++  TEST_COMPARE (stdc_first_leading_zero (0UL), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0UL), ui), 1);
++  TEST_COMPARE (stdc_first_leading_zero (0ULL), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0ULL), ui), 1);
++  TEST_COMPARE (stdc_first_leading_zero ((uc) ~0U), 0);
++  TEST_COMPARE (stdc_first_leading_zero ((us) ~0U), 0);
++  TEST_COMPARE (stdc_first_leading_zero (~0U), 0);
++  TEST_COMPARE (stdc_first_leading_zero (~0UL), 0);
++  TEST_COMPARE (stdc_first_leading_zero (~0ULL), 0);
++  TEST_COMPARE (stdc_first_leading_zero ((uc) ~3U), CHAR_BIT - 1);
++  TEST_COMPARE (stdc_first_leading_zero ((us) ~15U),
++		sizeof (short) * CHAR_BIT - 3);
++  TEST_COMPARE (stdc_first_leading_zero (~63U), sizeof (int) * CHAR_BIT - 5);
++  TEST_COMPARE (stdc_first_leading_zero (~255UL),
++		sizeof (long int) * CHAR_BIT - 7);
++  TEST_COMPARE (stdc_first_leading_zero (~1023ULL),
++		sizeof (long long int) * CHAR_BIT - 9);
++  TEST_COMPARE (stdc_first_leading_one ((uc) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_one ((uc) 0), ui), 1);
++  TEST_COMPARE (stdc_first_leading_one ((us) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_one ((us) 0), ui), 1);
++  TEST_COMPARE (stdc_first_leading_one (0U), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_one (0U), ui), 1);
++  TEST_COMPARE (stdc_first_leading_one (0UL), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_one (0UL), ui), 1);
++  TEST_COMPARE (stdc_first_leading_one (0ULL), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_one (0ULL), ui), 1);
++  TEST_COMPARE (stdc_first_leading_one ((uc) ~0U), 1);
++  TEST_COMPARE (stdc_first_leading_one ((us) ~0U), 1);
++  TEST_COMPARE (stdc_first_leading_one (~0U), 1);
++  TEST_COMPARE (stdc_first_leading_one (~0UL), 1);
++  TEST_COMPARE (stdc_first_leading_one (~0ULL), 1);
++  TEST_COMPARE (stdc_first_leading_one ((uc) 3), CHAR_BIT - 1);
++  TEST_COMPARE (stdc_first_leading_one ((us) 9),
++		sizeof (short) * CHAR_BIT - 3);
++  TEST_COMPARE (stdc_first_leading_one (34U), sizeof (int) * CHAR_BIT - 5);
++  TEST_COMPARE (stdc_first_leading_one (130UL),
++		sizeof (long int) * CHAR_BIT - 7);
++  TEST_COMPARE (stdc_first_leading_one (512ULL),
++		sizeof (long long int) * CHAR_BIT - 9);
++  TEST_COMPARE (stdc_first_trailing_zero ((uc) 0), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((uc) 0), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_zero ((us) 0), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((us) 0), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_zero (0U), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0U), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_zero (0UL), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0UL), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_zero (0ULL), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0ULL), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_zero ((uc) ~0U), 0);
++  TEST_COMPARE (stdc_first_trailing_zero ((us) ~0U), 0);
++  TEST_COMPARE (stdc_first_trailing_zero (~0U), 0);
++  TEST_COMPARE (stdc_first_trailing_zero (~0UL), 0);
++  TEST_COMPARE (stdc_first_trailing_zero (~0ULL), 0);
++  TEST_COMPARE (stdc_first_trailing_zero ((uc) 2), 1);
++  TEST_COMPARE (stdc_first_trailing_zero ((us) 15), 5);
++  TEST_COMPARE (stdc_first_trailing_zero (63U), 7);
++  TEST_COMPARE (stdc_first_trailing_zero (128UL), 1);
++  TEST_COMPARE (stdc_first_trailing_zero (511ULL), 10);
++  TEST_COMPARE (stdc_first_trailing_one ((uc) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((uc) 0), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_one ((us) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((us) 0), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_one (0U), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0U), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_one (0UL), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0UL), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_one (0ULL), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0ULL), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_one ((uc) ~0U), 1);
++  TEST_COMPARE (stdc_first_trailing_one ((us) ~0U), 1);
++  TEST_COMPARE (stdc_first_trailing_one (~0U), 1);
++  TEST_COMPARE (stdc_first_trailing_one (~0UL), 1);
++  TEST_COMPARE (stdc_first_trailing_one (~0ULL), 1);
++  TEST_COMPARE (stdc_first_trailing_one ((uc) 4), 3);
++  TEST_COMPARE (stdc_first_trailing_one ((us) 96), 6);
++  TEST_COMPARE (stdc_first_trailing_one (127U), 1);
++  TEST_COMPARE (stdc_first_trailing_one (511UL), 1);
++  TEST_COMPARE (stdc_first_trailing_one (~0ULL << 12), 13);
++  TEST_COMPARE (stdc_count_zeros ((uc) 0), CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_count_zeros ((uc) 0), ui), 1);
++  TEST_COMPARE (stdc_count_zeros ((us) 0), sizeof (short) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_count_zeros ((us) 0), ui), 1);
++  TEST_COMPARE (stdc_count_zeros (0U), sizeof (int) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_count_zeros (0U), ui), 1);
++  TEST_COMPARE (stdc_count_zeros (0UL), sizeof (long int) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_count_zeros (0UL), ui), 1);
++  TEST_COMPARE (stdc_count_zeros (0ULL), sizeof (long long int) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_count_zeros (0ULL), ui), 1);
++  TEST_COMPARE (stdc_count_zeros ((uc) ~0U), 0);
++  TEST_COMPARE (stdc_count_zeros ((us) ~0U), 0);
++  TEST_COMPARE (stdc_count_zeros (~0U), 0);
++  TEST_COMPARE (stdc_count_zeros (~0UL), 0);
++  TEST_COMPARE (stdc_count_zeros (~0ULL), 0);
++  TEST_COMPARE (stdc_count_zeros ((uc) 1U), CHAR_BIT - 1);
++  TEST_COMPARE (stdc_count_zeros ((us) 42), sizeof (short) * CHAR_BIT - 3);
++  TEST_COMPARE (stdc_count_zeros (291U), sizeof (int) * CHAR_BIT - 4);
++  TEST_COMPARE (stdc_count_zeros (~1315UL), 5);
++  TEST_COMPARE (stdc_count_zeros (3363ULL),
++		sizeof (long long int) * CHAR_BIT - 6);
++  TEST_COMPARE (stdc_count_ones ((uc) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_count_ones ((uc) 0), ui), 1);
++  TEST_COMPARE (stdc_count_ones ((us) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_count_ones ((us) 0), ui), 1);
++  TEST_COMPARE (stdc_count_ones (0U), 0);
++  TEST_COMPARE (expr_has_type (stdc_count_ones (0U), ui), 1);
++  TEST_COMPARE (stdc_count_ones (0UL), 0);
++  TEST_COMPARE (expr_has_type (stdc_count_ones (0UL), ui), 1);
++  TEST_COMPARE (stdc_count_ones (0ULL), 0);
++  TEST_COMPARE (expr_has_type (stdc_count_ones (0ULL), ui), 1);
++  TEST_COMPARE (stdc_count_ones ((uc) ~0U), CHAR_BIT);
++  TEST_COMPARE (stdc_count_ones ((us) ~0U), sizeof (short) * CHAR_BIT);
++  TEST_COMPARE (stdc_count_ones (~0U), sizeof (int) * CHAR_BIT);
++  TEST_COMPARE (stdc_count_ones (~0UL), sizeof (long int) * CHAR_BIT);
++  TEST_COMPARE (stdc_count_ones (~0ULL), sizeof (long long int) * CHAR_BIT);
++  TEST_COMPARE (stdc_count_ones ((uc) ~1U), CHAR_BIT - 1);
++  TEST_COMPARE (stdc_count_ones ((us) ~42), sizeof (short) * CHAR_BIT - 3);
++  TEST_COMPARE (stdc_count_ones (~291U), sizeof (int) * CHAR_BIT - 4);
++  TEST_COMPARE (stdc_count_ones (1315UL), 5);
++  TEST_COMPARE (stdc_count_ones (~3363ULL),
++		sizeof (long long int) * CHAR_BIT - 6);
++  TEST_COMPARE (stdc_has_single_bit ((uc) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_has_single_bit ((uc) 0), _Bool), 1);
++  TEST_COMPARE (stdc_has_single_bit ((us) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_has_single_bit ((us) 0), _Bool), 1);
++  TEST_COMPARE (stdc_has_single_bit (0U), 0);
++  TEST_COMPARE (expr_has_type (stdc_has_single_bit (0U), _Bool), 1);
++  TEST_COMPARE (stdc_has_single_bit (0UL), 0);
++  TEST_COMPARE (expr_has_type (stdc_has_single_bit (0UL), _Bool), 1);
++  TEST_COMPARE (stdc_has_single_bit (0ULL), 0);
++  TEST_COMPARE (expr_has_type (stdc_has_single_bit (0ULL), _Bool), 1);
++  TEST_COMPARE (stdc_has_single_bit ((uc) 2), 1);
++  TEST_COMPARE (stdc_has_single_bit ((us) 8), 1);
++  TEST_COMPARE (stdc_has_single_bit (32U), 1);
++  TEST_COMPARE (stdc_has_single_bit (128UL), 1);
++  TEST_COMPARE (stdc_has_single_bit (512ULL), 1);
++  TEST_COMPARE (stdc_has_single_bit ((uc) 7), 0);
++  TEST_COMPARE (stdc_has_single_bit ((us) 96), 0);
++  TEST_COMPARE (stdc_has_single_bit (513U), 0);
++  TEST_COMPARE (stdc_has_single_bit (1022UL), 0);
++  TEST_COMPARE (stdc_has_single_bit (12ULL), 0);
++  TEST_COMPARE (stdc_bit_width ((uc) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_width ((uc) 0), ui), 1);
++  TEST_COMPARE (stdc_bit_width ((us) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_width ((us) 0), ui), 1);
++  TEST_COMPARE (stdc_bit_width (0U), 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_width (0U), ui), 1);
++  TEST_COMPARE (stdc_bit_width (0UL), 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_width (0UL), ui), 1);
++  TEST_COMPARE (stdc_bit_width (0ULL), 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_width (0ULL), ui), 1);
++  TEST_COMPARE (stdc_bit_width ((uc) ~0U), CHAR_BIT);
++  TEST_COMPARE (stdc_bit_width ((us) ~0U), sizeof (short) * CHAR_BIT);
++  TEST_COMPARE (stdc_bit_width (~0U), sizeof (int) * CHAR_BIT);
++  TEST_COMPARE (stdc_bit_width (~0UL), sizeof (long int) * CHAR_BIT);
++  TEST_COMPARE (stdc_bit_width (~0ULL), sizeof (long long int) * CHAR_BIT);
++  TEST_COMPARE (stdc_bit_width ((uc) ((uc) ~0U >> 1)), CHAR_BIT - 1);
++  TEST_COMPARE (stdc_bit_width ((uc) 6), 3);
++  TEST_COMPARE (stdc_bit_width ((us) 12U), 4);
++  TEST_COMPARE (stdc_bit_width ((us) ((us) ~0U >> 5)),
++		sizeof (short) * CHAR_BIT - 5);
++  TEST_COMPARE (stdc_bit_width (137U), 8);
++  TEST_COMPARE (stdc_bit_width (269U), 9);
++  TEST_COMPARE (stdc_bit_width (39UL), 6);
++  TEST_COMPARE (stdc_bit_width (~0UL >> 2), sizeof (long int) * CHAR_BIT - 2);
++  TEST_COMPARE (stdc_bit_width (1023ULL), 10);
++  TEST_COMPARE (stdc_bit_width (1024ULL), 11);
++  TEST_COMPARE (stdc_bit_floor ((uc) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_floor ((uc) 0), uc), 1);
++  TEST_COMPARE (stdc_bit_floor ((us) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_floor ((us) 0), us), 1);
++  TEST_COMPARE (stdc_bit_floor (0U), 0U);
++  TEST_COMPARE (expr_has_type (stdc_bit_floor (0U), ui), 1);
++  TEST_COMPARE (stdc_bit_floor (0UL), 0UL);
++  TEST_COMPARE (expr_has_type (stdc_bit_floor (0UL), ul), 1);
++  TEST_COMPARE (stdc_bit_floor (0ULL), 0ULL);
++  TEST_COMPARE (expr_has_type (stdc_bit_floor (0ULL), ull), 1);
++  TEST_COMPARE (stdc_bit_floor ((uc) ~0U), (1U << (CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_floor ((us) ~0U),
++		(1U << (sizeof (short) * CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_floor (~0U), (1U << (sizeof (int) * CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_floor (~0UL),
++		(1UL << (sizeof (long int) * CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_floor (~0ULL),
++		(1ULL << (sizeof (long long int) * CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_floor ((uc) 4), 4);
++  TEST_COMPARE (stdc_bit_floor ((uc) 7), 4);
++  TEST_COMPARE (stdc_bit_floor ((us) 8U), 8);
++  TEST_COMPARE (stdc_bit_floor ((us) 31U), 16);
++  TEST_COMPARE (stdc_bit_floor (137U), 128U);
++  TEST_COMPARE (stdc_bit_floor (269U), 256U);
++  TEST_COMPARE (stdc_bit_floor (511UL), 256UL);
++  TEST_COMPARE (stdc_bit_floor (512UL), 512UL);
++  TEST_COMPARE (stdc_bit_floor (513UL), 512ULL);
++  TEST_COMPARE (stdc_bit_floor (1024ULL), 1024ULL);
++  TEST_COMPARE (stdc_bit_ceil ((uc) 0), 1);
++  TEST_COMPARE (expr_has_type (stdc_bit_ceil ((uc) 0), uc), 1);
++  TEST_COMPARE (stdc_bit_ceil ((us) 0), 1);
++  TEST_COMPARE (expr_has_type (stdc_bit_ceil ((us) 0), us), 1);
++  TEST_COMPARE (stdc_bit_ceil (0U), 1U);
++  TEST_COMPARE (expr_has_type (stdc_bit_ceil (0U), ui), 1);
++  TEST_COMPARE (stdc_bit_ceil (0UL), 1UL);
++  TEST_COMPARE (expr_has_type (stdc_bit_ceil (0UL), ul), 1);
++  TEST_COMPARE (stdc_bit_ceil (0ULL), 1ULL);
++  TEST_COMPARE (expr_has_type (stdc_bit_ceil (0ULL), ull), 1);
++  TEST_COMPARE (stdc_bit_ceil ((uc) ~0U), 0);
++  TEST_COMPARE (stdc_bit_ceil ((us) ~0U), 0);
++  TEST_COMPARE (stdc_bit_ceil (~0U), 0U);
++  TEST_COMPARE (stdc_bit_ceil (~0UL), 0UL);
++  TEST_COMPARE (stdc_bit_ceil (~0ULL), 0ULL);
++  TEST_COMPARE (stdc_bit_ceil ((uc) ((uc) ~0U >> 1)), (1U << (CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_ceil ((uc) ((uc) ~0U >> 1)), (1U << (CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_ceil ((us) ((us) ~0U >> 1)),
++		(1U << (sizeof (short) * CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_ceil ((us) ((us) ~0U >> 1)),
++		(1U << (sizeof (short) * CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_ceil (~0U >> 1),
++		(1U << (sizeof (int) * CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_ceil (1U << (sizeof (int) * CHAR_BIT - 1)),
++		(1U << (sizeof (int) * CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_ceil (~0UL >> 1),
++		(1UL << (sizeof (long int) * CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_ceil (~0UL >> 1),
++		(1UL << (sizeof (long int) * CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_ceil (1ULL
++			       << (sizeof (long long int) * CHAR_BIT - 1)),
++		(1ULL << (sizeof (long long int) * CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_ceil (~0ULL >> 1),
++		(1ULL << (sizeof (long long int) * CHAR_BIT - 1)));
++  TEST_COMPARE (stdc_bit_ceil ((uc) 1), 1);
++  TEST_COMPARE (stdc_bit_ceil ((uc) 2), 2);
++  TEST_COMPARE (stdc_bit_ceil ((us) 3U), 4);
++  TEST_COMPARE (stdc_bit_ceil ((us) 4U), 4);
++  TEST_COMPARE (stdc_bit_ceil (5U), 8U);
++  TEST_COMPARE (stdc_bit_ceil (269U), 512U);
++  TEST_COMPARE (stdc_bit_ceil (511UL), 512UL);
++  TEST_COMPARE (stdc_bit_ceil (512UL), 512UL);
++  TEST_COMPARE (stdc_bit_ceil (513ULL), 1024ULL);
++  TEST_COMPARE (stdc_bit_ceil (1025ULL), 2048ULL);
++# ifdef __SIZEOF_INT128__
++  TEST_COMPARE (stdc_leading_zeros ((unsigned __int128) 0),
++		sizeof (__int128) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned __int128) 0), ui),
++		1);
++  TEST_COMPARE (stdc_leading_zeros (~(unsigned __int128) 0), 0);
++  TEST_COMPARE (stdc_leading_ones ((unsigned __int128) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned __int128) 0), ui),
++		1);
++  TEST_COMPARE (stdc_leading_ones (~(unsigned __int128) 0),
++		sizeof (__int128) * CHAR_BIT);
++  TEST_COMPARE (stdc_trailing_zeros ((unsigned __int128) 0),
++		sizeof (__int128) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned __int128) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_trailing_zeros (~(unsigned __int128) 0), 0);
++  TEST_COMPARE (stdc_trailing_ones ((unsigned __int128) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned __int128) 0), ui),
++		1);
++  TEST_COMPARE (stdc_trailing_ones (~(unsigned __int128) 0),
++		sizeof (__int128) * CHAR_BIT);
++  TEST_COMPARE (stdc_first_leading_zero ((unsigned __int128) 0), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned __int128) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_first_leading_zero (~(unsigned __int128) 0), 0);
++  TEST_COMPARE (stdc_first_leading_one ((unsigned __int128) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned __int128) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_first_leading_one (~(unsigned __int128) 0), 1);
++  TEST_COMPARE (stdc_first_trailing_zero ((unsigned __int128) 0), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned __int128)
++							 0), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_zero (~(unsigned __int128) 0), 0);
++  TEST_COMPARE (stdc_first_trailing_one ((unsigned __int128) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned __int128) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_first_trailing_one (~(unsigned __int128) 0), 1);
++  TEST_COMPARE (stdc_count_zeros ((unsigned __int128) 0),
++		sizeof (__int128) * CHAR_BIT);
++  TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned __int128) 0), ui),
++		1);
++  TEST_COMPARE (stdc_count_zeros (~(unsigned __int128) 0), 0);
++  TEST_COMPARE (stdc_count_ones ((unsigned __int128) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned __int128) 0), ui),
++		1);
++  TEST_COMPARE (stdc_count_ones (~(unsigned __int128) 0),
++		sizeof (__int128) * CHAR_BIT);
++  TEST_COMPARE (stdc_has_single_bit ((unsigned __int128) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned __int128) 0),
++		_Bool), 1);
++  TEST_COMPARE (stdc_has_single_bit (~(unsigned __int128) 0), 0);
++  TEST_COMPARE (stdc_bit_width ((unsigned __int128) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned __int128) 0), ui), 1);
++  TEST_COMPARE (stdc_bit_width (~(unsigned __int128) 0),
++		sizeof (__int128) * CHAR_BIT);
++  TEST_COMPARE (stdc_bit_floor ((unsigned __int128) 0) != 0, 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned __int128) 0),
++			       unsigned __int128), 1);
++  TEST_COMPARE (stdc_bit_floor (~(unsigned __int128) 0)
++		!= ((unsigned __int128) 1) << (sizeof (__int128)
++					       * CHAR_BIT - 1), 0);
++  TEST_COMPARE (stdc_bit_ceil ((unsigned __int128) 0) != 1, 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned __int128) 0),
++			       unsigned __int128), 1);
++  TEST_COMPARE (stdc_bit_ceil ((unsigned __int128) 1) != 1, 0);
++  TEST_COMPARE (stdc_bit_ceil ((~(unsigned __int128) 0) >> 1)
++		!= ((unsigned __int128) 1) << (sizeof (__int128)
++					       * CHAR_BIT - 1), 0);
++  TEST_COMPARE (stdc_bit_ceil (~(unsigned __int128) 0) != 0, 0);
++# endif
++  uc a = 0;
++  TEST_COMPARE (stdc_bit_width (a++), 0);
++  TEST_COMPARE (a, 1);
++  ull b = 0;
++  TEST_COMPARE (stdc_bit_width (b++), 0);
++  TEST_COMPARE (b, 1);
++  TEST_COMPARE (stdc_bit_floor (a++), 1);
++  TEST_COMPARE (a, 2);
++  TEST_COMPARE (stdc_bit_floor (b++), 1);
++  TEST_COMPARE (b, 2);
++  TEST_COMPARE (stdc_bit_ceil (a++), 2);
++  TEST_COMPARE (a, 3);
++  TEST_COMPARE (stdc_bit_ceil (b++), 2);
++  TEST_COMPARE (b, 3);
++  TEST_COMPARE (stdc_leading_zeros (a++), CHAR_BIT - 2);
++  TEST_COMPARE (a, 4);
++  TEST_COMPARE (stdc_leading_zeros (b++),
++		sizeof (long long int) * CHAR_BIT - 2);
++  TEST_COMPARE (b, 4);
++  TEST_COMPARE (stdc_leading_ones (a++), 0);
++  TEST_COMPARE (a, 5);
++  TEST_COMPARE (stdc_leading_ones (b++), 0);
++  TEST_COMPARE (b, 5);
++  TEST_COMPARE (stdc_trailing_zeros (a++), 0);
++  TEST_COMPARE (a, 6);
++  TEST_COMPARE (stdc_trailing_zeros (b++), 0);
++  TEST_COMPARE (b, 6);
++  TEST_COMPARE (stdc_trailing_ones (a++), 0);
++  TEST_COMPARE (a, 7);
++  TEST_COMPARE (stdc_trailing_ones (b++), 0);
++  TEST_COMPARE (b, 7);
++  TEST_COMPARE (stdc_first_leading_zero (a++), 1);
++  TEST_COMPARE (a, 8);
++  TEST_COMPARE (stdc_first_leading_zero (b++), 1);
++  TEST_COMPARE (b, 8);
++  TEST_COMPARE (stdc_first_leading_one (a++), CHAR_BIT - 3);
++  TEST_COMPARE (a, 9);
++  TEST_COMPARE (stdc_first_leading_one (b++),
++		sizeof (long long int) * CHAR_BIT - 3);
++  TEST_COMPARE (b, 9);
++  TEST_COMPARE (stdc_first_trailing_zero (a++), 2);
++  TEST_COMPARE (a, 10);
++  TEST_COMPARE (stdc_first_trailing_zero (b++), 2);
++  TEST_COMPARE (b, 10);
++  TEST_COMPARE (stdc_first_trailing_one (a++), 2);
++  TEST_COMPARE (a, 11);
++  TEST_COMPARE (stdc_first_trailing_one (b++), 2);
++  TEST_COMPARE (b, 11);
++  TEST_COMPARE (stdc_count_zeros (a++), CHAR_BIT - 3);
++  TEST_COMPARE (a, 12);
++  TEST_COMPARE (stdc_count_zeros (b++),
++		sizeof (long long int) * CHAR_BIT - 3);
++  TEST_COMPARE (b, 12);
++  TEST_COMPARE (stdc_count_ones (a++), 2);
++  TEST_COMPARE (a, 13);
++  TEST_COMPARE (stdc_count_ones (b++), 2);
++  TEST_COMPARE (b, 13);
++  TEST_COMPARE (stdc_has_single_bit (a++), 0);
++  TEST_COMPARE (a, 14);
++  TEST_COMPARE (stdc_has_single_bit (b++), 0);
++  TEST_COMPARE (b, 14);
++# ifdef BITINT_MAXWIDTH
++#  if BITINT_MAXWIDTH >= 64
++  TEST_COMPARE (stdc_leading_zeros (0uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_leading_zeros (0uwb), ui), 1);
++  TEST_COMPARE (stdc_leading_zeros (1uwb), 0);
++  TEST_COMPARE (expr_has_type (stdc_leading_zeros (1uwb), ui), 1);
++  TEST_COMPARE (stdc_leading_ones (0uwb), 0);
++  TEST_COMPARE (expr_has_type (stdc_leading_ones (0uwb), ui), 1);
++  TEST_COMPARE (stdc_leading_ones (1uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_leading_ones (1uwb), ui), 1);
++  TEST_COMPARE (stdc_trailing_zeros (0uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0uwb), ui), 1);
++  TEST_COMPARE (stdc_trailing_zeros (1uwb), 0);
++  TEST_COMPARE (expr_has_type (stdc_trailing_zeros (1uwb), ui), 1);
++  TEST_COMPARE (stdc_trailing_ones (0uwb), 0);
++  TEST_COMPARE (expr_has_type (stdc_trailing_ones (0uwb), ui), 1);
++  TEST_COMPARE (stdc_trailing_ones (1uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_trailing_ones (1uwb), ui), 1);
++  TEST_COMPARE (stdc_first_leading_zero (0uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0uwb), ui), 1);
++  TEST_COMPARE (stdc_first_leading_zero (1uwb), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_zero (1uwb), ui), 1);
++  TEST_COMPARE (stdc_first_leading_one (0uwb), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_one (0uwb), ui), 1);
++  TEST_COMPARE (stdc_first_leading_one (1uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_one (1uwb), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_zero (0uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0uwb), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_zero (1uwb), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (1uwb), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_one (0uwb), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0uwb), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_one (1uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_one (1uwb), ui), 1);
++  TEST_COMPARE (stdc_count_zeros (0uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_count_zeros (0uwb), ui), 1);
++  TEST_COMPARE (stdc_count_zeros (1uwb), 0);
++  TEST_COMPARE (expr_has_type (stdc_count_zeros (1uwb), ui), 1);
++  TEST_COMPARE (stdc_count_ones (0uwb), 0);
++  TEST_COMPARE (expr_has_type (stdc_count_ones (0uwb), ui), 1);
++  TEST_COMPARE (stdc_count_ones (1uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_count_ones (1uwb), ui), 1);
++  TEST_COMPARE (stdc_has_single_bit (0uwb), 0);
++  TEST_COMPARE (expr_has_type (stdc_has_single_bit (0uwb), _Bool), 1);
++  TEST_COMPARE (stdc_has_single_bit (1uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_has_single_bit (1uwb), _Bool), 1);
++  TEST_COMPARE (stdc_bit_width (0uwb), 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_width (0uwb), ui), 1);
++  TEST_COMPARE (stdc_bit_width (1uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_bit_width (1uwb), ui), 1);
++  TEST_COMPARE (stdc_bit_floor (0uwb), 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_floor (0uwb), unsigned _BitInt(1)), 1);
++  TEST_COMPARE (stdc_bit_floor (1uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_bit_floor (1uwb), unsigned _BitInt(1)), 1);
++  TEST_COMPARE (stdc_bit_ceil (0uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_bit_ceil (0uwb), unsigned _BitInt(1)), 1);
++  TEST_COMPARE (stdc_bit_ceil (1uwb), 1);
++  TEST_COMPARE (expr_has_type (stdc_bit_ceil (1uwb), unsigned _BitInt(1)), 1);
++  unsigned _BitInt(1) c = 0;
++  TEST_COMPARE (stdc_bit_floor (c++), 0);
++  TEST_COMPARE (c, 1);
++  TEST_COMPARE (stdc_bit_floor (c++), 1);
++  TEST_COMPARE (c, 0);
++  TEST_COMPARE (stdc_bit_ceil (c++), 1);
++  TEST_COMPARE (c, 1);
++  TEST_COMPARE (stdc_bit_ceil (c++), 1);
++  TEST_COMPARE (c, 0);
++#  endif
++#  if BITINT_MAXWIDTH >= 512
++  TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(512)) 0), 512);
++  TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned _BitInt(512)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(373)) 0), 373);
++  TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned _BitInt(373)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_leading_zeros (~(unsigned _BitInt(512)) 0), 0);
++  TEST_COMPARE (stdc_leading_zeros (~(unsigned _BitInt(373)) 0), 0);
++  TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(512)) 275), 512 - 9);
++  TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(373)) 512), 373 - 10);
++  TEST_COMPARE (stdc_leading_ones ((unsigned _BitInt(512)) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned _BitInt(512)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_leading_ones ((unsigned _BitInt(373)) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned _BitInt(373)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(512)) 0), 512);
++  TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(373)) 0), 373);
++  TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(512)) 275), 512 - 9);
++  TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(373)) 512), 373 - 10);
++  TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(512)) 0), 512);
++  TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned _BitInt(512)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(373)) 0), 373);
++  TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned _BitInt(373)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_trailing_zeros (~(unsigned _BitInt(512)) 0), 0);
++  TEST_COMPARE (stdc_trailing_zeros (~(unsigned _BitInt(373)) 0), 0);
++  TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(512)) 256), 8);
++  TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(373)) 512), 9);
++  TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(512)) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned _BitInt(512)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(373)) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned _BitInt(373)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_trailing_ones (~(unsigned _BitInt(512)) 0), 512);
++  TEST_COMPARE (stdc_trailing_ones (~(unsigned _BitInt(373)) 0), 373);
++  TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(512)) 255), 8);
++  TEST_COMPARE (stdc_trailing_ones ((~(unsigned _BitInt(373)) 0) >> 2),
++		373 - 2);
++  TEST_COMPARE (stdc_first_leading_zero ((unsigned _BitInt(512)) 0), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned _BitInt(512))
++							0), ui), 1);
++  TEST_COMPARE (stdc_first_leading_zero ((unsigned _BitInt(373)) 0), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned _BitInt(373))
++							0), ui), 1);
++  TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(512)) 0), 0);
++  TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(373)) 0), 0);
++  TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(512)) 511),
++		512 - 8);
++  TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(373)) 1023),
++		373 - 9);
++  TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(512)) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned _BitInt(512))
++						       0), ui), 1);
++  TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(373)) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned _BitInt(373))
++						       0), ui), 1);
++  TEST_COMPARE (stdc_first_leading_one (~(unsigned _BitInt(512)) 0), 1);
++  TEST_COMPARE (stdc_first_leading_one (~(unsigned _BitInt(373)) 0), 1);
++  TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(512)) 275), 512 - 8);
++  TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(373)) 512), 373 - 9);
++  TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(512)) 0), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned
++							  _BitInt(512)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(373)) 0), 1);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned
++							  _BitInt(373)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_first_trailing_zero (~(unsigned _BitInt(512)) 0), 0);
++  TEST_COMPARE (stdc_first_trailing_zero (~(unsigned _BitInt(373)) 0), 0);
++  TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(512)) 255), 9);
++  TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(373)) 511), 10);
++  TEST_COMPARE (stdc_first_trailing_one ((unsigned _BitInt(512)) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned _BitInt(512))
++							0), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_one ((unsigned _BitInt(373)) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned _BitInt(373))
++							0), ui), 1);
++  TEST_COMPARE (stdc_first_trailing_one (~(unsigned _BitInt(512)) 0), 1);
++  TEST_COMPARE (stdc_first_trailing_one (~(unsigned _BitInt(373)) 0), 1);
++  TEST_COMPARE (stdc_first_trailing_one (((unsigned _BitInt(512)) 255) << 175),
++		176);
++  TEST_COMPARE (stdc_first_trailing_one ((~(unsigned _BitInt(373)) 0) << 311),
++		312);
++  TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(512)) 0), 512);
++  TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned _BitInt(512)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(373)) 0), 373);
++  TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned _BitInt(373)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_count_zeros (~(unsigned _BitInt(512)) 0), 0);
++  TEST_COMPARE (stdc_count_zeros (~(unsigned _BitInt(373)) 0), 0);
++  TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(512)) 1315), 512 - 5);
++  TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(373)) 3363), 373 - 6);
++  TEST_COMPARE (stdc_count_ones ((unsigned _BitInt(512)) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned _BitInt(512)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_count_ones ((unsigned _BitInt(373)) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned _BitInt(373)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(512)) 0), 512);
++  TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(373)) 0), 373);
++  TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(512)) 1315), 512 - 5);
++  TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(373)) 3363), 373 - 6);
++  TEST_COMPARE (stdc_has_single_bit ((unsigned _BitInt(512)) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned _BitInt(512)) 0),
++			       _Bool), 1);
++  TEST_COMPARE (stdc_has_single_bit ((unsigned _BitInt(373)) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned _BitInt(373)) 0),
++			       _Bool), 1);
++  TEST_COMPARE (stdc_has_single_bit (~(unsigned _BitInt(512)) 0), 0);
++  TEST_COMPARE (stdc_has_single_bit (~(unsigned _BitInt(373)) 0), 0);
++  TEST_COMPARE (stdc_has_single_bit (((unsigned _BitInt(512)) 1022) << 279),
++		0);
++  TEST_COMPARE (stdc_has_single_bit (((unsigned _BitInt(373)) 12) << 305), 0);
++  TEST_COMPARE (stdc_bit_width ((unsigned _BitInt(512)) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned _BitInt(512)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_bit_width ((unsigned _BitInt(373)) 0), 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned _BitInt(373)) 0),
++			       ui), 1);
++  TEST_COMPARE (stdc_bit_width (~(unsigned _BitInt(512)) 0), 512);
++  TEST_COMPARE (stdc_bit_width (~(unsigned _BitInt(373)) 0), 373);
++  TEST_COMPARE (stdc_bit_width (((unsigned _BitInt(512)) 1023) << 405),
++		405 + 10);
++  TEST_COMPARE (stdc_bit_width (((unsigned _BitInt(373)) 1024) << 242),
++		242 + 11);
++  TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(512)) 0) != 0, 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned _BitInt(512)) 0),
++			       unsigned _BitInt(512)), 1);
++  TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(373)) 0) != 0, 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned _BitInt(373)) 0),
++			       unsigned _BitInt(373)), 1);
++  TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(512)) 0)
++		!= ((unsigned _BitInt(512)) 1) << (512 - 1), 0);
++  TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(373)) 0)
++		!= ((unsigned _BitInt(373)) 1) << (373 - 1), 0);
++  TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(512)) 511) << 405)
++		!= (((unsigned _BitInt(512)) 256) << 405), 0);
++  TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(373)) 512) << 242)
++		!= (((unsigned _BitInt(512)) 512) << 242), 0);
++  TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(512)) 0) != 1, 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned _BitInt(512)) 0),
++			       unsigned _BitInt(512)), 1);
++  TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(373)) 0) != 1, 0);
++  TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned _BitInt(373)) 0),
++			       unsigned _BitInt(373)), 1);
++  TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(512)) 0) != 0, 0);
++  TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(373)) 0) != 0, 0);
++  TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(512)) 1) << (512 - 1))
++		!= ((unsigned _BitInt(512)) 1) << (512 - 1), 0);
++  TEST_COMPARE (stdc_bit_ceil ((~(unsigned _BitInt(373)) 0) >> 1)
++		!= ((unsigned _BitInt(373)) 1) << (373 - 1), 0);
++  TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(512)) 512) << 405)
++		!= (((unsigned _BitInt(512)) 512) << 405), 0);
++  TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(373)) 513) << 242)
++		!= (((unsigned _BitInt(512)) 1024) << 242), 0);
++  TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 0,
++		0);
++  TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(BITINT_MAXWIDTH)) 0)
++		!= ((unsigned _BitInt(BITINT_MAXWIDTH)) 1) << (BITINT_MAXWIDTH
++							       - 1), 0);
++  TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(BITINT_MAXWIDTH)) 511)
++				<< 405)
++		!= (((unsigned _BitInt(BITINT_MAXWIDTH)) 256) << 405), 0);
++  TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(BITINT_MAXWIDTH)) 512)
++				<< 405)
++		!= (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) << 405), 0);
++  TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 1, 0);
++  TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 0,
++		0);
++  TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 1)
++			       << (BITINT_MAXWIDTH - 1))
++		!= ((unsigned _BitInt(BITINT_MAXWIDTH)) 1) << (BITINT_MAXWIDTH
++							       - 1), 0);
++  TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 512)
++			       << 405)
++		!= (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) << 405), 0);
++  TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 513)
++			       << 405)
++		!= (((unsigned _BitInt(BITINT_MAXWIDTH)) 1024) << 405), 0);
++#  endif
++# endif
++  return 0;
++}
++#else
++static int
++do_test (void)
++{
++  return 0;
++}
++#endif
++
++#include <support/test-driver.c>
+
+commit 71fcdba577884627c3ee4e43beb915da752efb1f
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Fri Mar 15 19:08:24 2024 +0100
+
+    linux: Use rseq area unconditionally in sched_getcpu (bug 31479)
+    
+    Originally, nptl/descr.h included <sys/rseq.h>, but we removed that
+    in commit 2c6b4b272e6b4d07303af25709051c3e96288f2d ("nptl:
+    Unconditionally use a 32-byte rseq area").  After that, it was
+    not ensured that the RSEQ_SIG macro was defined during sched_getcpu.c
+    compilation that provided a definition.  This commit always checks
+    the rseq area for CPU number information before using the other
+    approaches.
+    
+    This adds an unnecessary (but well-predictable) branch on
+    architectures which do not define RSEQ_SIG, but its cost is small
+    compared to the system call.  Most architectures that have vDSO
+    acceleration for getcpu also have rseq support.
+    
+    Fixes: 2c6b4b272e6b4d07303af25709051c3e96288f2d
+    Fixes: 1d350aa06091211863e41169729cee1bca39f72f
+    Reviewed-by: Arjun Shankar <arjun@redhat.com>
+    (cherry picked from commit 7a76f218677d149d8b7875b336722108239f7ee9)
+
+diff --git a/sysdeps/unix/sysv/linux/sched_getcpu.c b/sysdeps/unix/sysv/linux/sched_getcpu.c
+index dfb884568d..72a3360550 100644
+--- a/sysdeps/unix/sysv/linux/sched_getcpu.c
++++ b/sysdeps/unix/sysv/linux/sched_getcpu.c
+@@ -33,17 +33,9 @@ vsyscall_sched_getcpu (void)
+   return r == -1 ? r : cpu;
+ }
+ 
+-#ifdef RSEQ_SIG
+ int
+ sched_getcpu (void)
+ {
+   int cpu_id = THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id);
+   return __glibc_likely (cpu_id >= 0) ? cpu_id : vsyscall_sched_getcpu ();
+ }
+-#else /* RSEQ_SIG */
+-int
+-sched_getcpu (void)
+-{
+-  return vsyscall_sched_getcpu ();
+-}
+-#endif /* RSEQ_SIG */
+
+commit ee7f4c54e19738c2c27d3846e1e9b3595c89221f
+Author: Manjunath Matti <mmatti@linux.ibm.com>
+Date:   Tue Mar 19 15:29:48 2024 -0500
+
+    powerpc: Add HWCAP3/HWCAP4 data to TCB for Power Architecture.
+    
+    This patch adds a new feature for powerpc.  In order to get faster
+    access to the HWCAP3/HWCAP4 masks, similar to HWCAP/HWCAP2 (i.e. for
+    implementing __builtin_cpu_supports() in GCC) without the overhead of
+    reading them from the auxiliary vector, we now reserve space for them
+    in the TCB.
+    
+    Suggested-by: Peter Bergner <bergner@linux.ibm.com>
+    Reviewed-by: Peter Bergner <bergner@linux.ibm.com>
+    (cherry picked from commit 3ab9b88e2ac91062b6d493fe32bd101a55006c6a)
+
+diff --git a/elf/dl-diagnostics.c b/elf/dl-diagnostics.c
+index 7345ebc4e5..aaf67b87e8 100644
+--- a/elf/dl-diagnostics.c
++++ b/elf/dl-diagnostics.c
+@@ -235,6 +235,8 @@ _dl_print_diagnostics (char **environ)
+   _dl_diagnostics_print_labeled_value ("dl_hwcap", GLRO (dl_hwcap));
+   _dl_diagnostics_print_labeled_value ("dl_hwcap_important", HWCAP_IMPORTANT);
+   _dl_diagnostics_print_labeled_value ("dl_hwcap2", GLRO (dl_hwcap2));
++  _dl_diagnostics_print_labeled_value ("dl_hwcap3", GLRO (dl_hwcap3));
++  _dl_diagnostics_print_labeled_value ("dl_hwcap4", GLRO (dl_hwcap4));
+   _dl_diagnostics_print_labeled_string
+     ("dl_hwcaps_subdirs", _dl_hwcaps_subdirs);
+   _dl_diagnostics_print_labeled_value
+diff --git a/elf/dl-support.c b/elf/dl-support.c
+index 2f502c8b0d..451932dd03 100644
+--- a/elf/dl-support.c
++++ b/elf/dl-support.c
+@@ -158,6 +158,8 @@ const ElfW(Phdr) *_dl_phdr;
+ size_t _dl_phnum;
+ uint64_t _dl_hwcap;
+ uint64_t _dl_hwcap2;
++uint64_t _dl_hwcap3;
++uint64_t _dl_hwcap4;
+ 
+ enum dso_sort_algorithm _dl_dso_sort_algo;
+ 
+diff --git a/elf/elf.h b/elf/elf.h
+index 455731663c..1c394c64cd 100644
+--- a/elf/elf.h
++++ b/elf/elf.h
+@@ -1234,6 +1234,10 @@ typedef struct
+ #define AT_RSEQ_FEATURE_SIZE	27	/* rseq supported feature size.  */
+ #define AT_RSEQ_ALIGN	28		/* rseq allocation alignment.  */
+ 
++/* More machine-dependent hints about processor capabilities.  */
++#define AT_HWCAP3	29		/* extension of AT_HWCAP.  */
++#define AT_HWCAP4	30		/* extension of AT_HWCAP.  */
++
+ #define AT_EXECFN	31		/* Filename of executable.  */
+ 
+ /* Pointer to the global system page used for system calls and other
+diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
+index 117c901ccc..50f58a60e3 100644
+--- a/sysdeps/generic/ldsodefs.h
++++ b/sysdeps/generic/ldsodefs.h
+@@ -646,6 +646,8 @@ struct rtld_global_ro
+   /* Mask for more hardware capabilities that are available on some
+      platforms.  */
+   EXTERN uint64_t _dl_hwcap2;
++  EXTERN uint64_t _dl_hwcap3;
++  EXTERN uint64_t _dl_hwcap4;
+ 
+   EXTERN enum dso_sort_algorithm _dl_dso_sort_algo;
+ 
+diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
+index a76bb6e5b0..8cf00aa7e3 100644
+--- a/sysdeps/powerpc/dl-procinfo.c
++++ b/sysdeps/powerpc/dl-procinfo.c
+@@ -38,6 +38,10 @@
+        needed.
+   */
+ 
++/* The total number of available bits (including those prior to
++   _DL_HWCAP_FIRST).  Some of these bits might not be used.  */
++#define _DL_HWCAP_COUNT         128
++
+ #ifndef PROCINFO_CLASS
+ # define PROCINFO_CLASS
+ #endif
+@@ -61,7 +65,7 @@ PROCINFO_CLASS struct cpu_features _dl_powerpc_cpu_features
+ #if !defined PROCINFO_DECL && defined SHARED
+   ._dl_powerpc_cap_flags
+ #else
+-PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15]
++PROCINFO_CLASS const char _dl_powerpc_cap_flags[_DL_HWCAP_COUNT][15]
+ #endif
+ #ifndef PROCINFO_DECL
+ = {
+diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h
+index 68f4241095..f8cb343877 100644
+--- a/sysdeps/powerpc/dl-procinfo.h
++++ b/sysdeps/powerpc/dl-procinfo.h
+@@ -22,16 +22,17 @@
+ #include <ldsodefs.h>
+ #include <sysdep.h>	/* This defines the PPC_FEATURE[2]_* macros.  */
+ 
+-/* The total number of available bits (including those prior to
+-   _DL_HWCAP_FIRST).  Some of these bits might not be used.  */
+-#define _DL_HWCAP_COUNT		64
++/* Feature masks are all 32-bits in size.  */
++#define _DL_HWCAP_SIZE		32
+ 
+-/* Features started at bit 31 and decremented as new features were added.  */
+-#define _DL_HWCAP_LAST		31
++/* AT_HWCAP2 feature strings follow the AT_HWCAP feature strings.  */
++#define _DL_HWCAP2_OFFSET	_DL_HWCAP_SIZE
+ 
+-/* AT_HWCAP2 features started at bit 31 and decremented as new features were
+-   added.  HWCAP2 feature bits start at bit 0.  */
+-#define _DL_HWCAP2_LAST		31
++/* AT_HWCAP3 feature strings follow the AT_HWCAP2 feature strings.  */
++#define _DL_HWCAP3_OFFSET	(_DL_HWCAP2_OFFSET + _DL_HWCAP_SIZE)
++
++/* AT_HWCAP4 feature strings follow the AT_HWCAP3 feature strings.  */
++#define _DL_HWCAP4_OFFSET	(_DL_HWCAP3_OFFSET + _DL_HWCAP_SIZE)
+ 
+ /* These bits influence library search.  */
+ #define HWCAP_IMPORTANT		(PPC_FEATURE_HAS_ALTIVEC \
+@@ -187,21 +188,42 @@ _dl_procinfo (unsigned int type, unsigned long int word)
+     case AT_HWCAP:
+       _dl_printf ("AT_HWCAP:            ");
+ 
+-      for (int i = 0; i <= _DL_HWCAP_LAST; ++i)
++      for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
+        if (word & (1 << i))
+          _dl_printf (" %s", _dl_hwcap_string (i));
+       break;
+     case AT_HWCAP2:
+       {
+-       unsigned int offset = _DL_HWCAP_LAST + 1;
+ 
+        _dl_printf ("AT_HWCAP2:           ");
+ 
+-        /* We have to go through them all because the kernel added the
+-          AT_HWCAP2 features starting with the high bits.  */
+-       for (int i = 0; i <= _DL_HWCAP2_LAST; ++i)
+-         if (word & (1 << i))
+-           _dl_printf (" %s", _dl_hwcap_string (offset + i));
++       /* We have to go through them all because the kernel added the
++	  AT_HWCAP2 features starting with the high bits.  */
++       for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
++	 if (word & (1 << i))
++	   _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP2_OFFSET + i));
++       break;
++      }
++    case AT_HWCAP3:
++      {
++       _dl_printf ("AT_HWCAP3:           ");
++
++       /* We have to go through them all because the kernel added the
++	  AT_HWCAP3 features starting with the high bits.  */
++       for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
++	 if (word & (1 << i))
++	   _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP3_OFFSET + i));
++       break;
++      }
++    case AT_HWCAP4:
++      {
++       _dl_printf ("AT_HWCAP4:           ");
++
++       /* We have to go through them all because the kernel added the
++	  AT_HWCAP4 features starting with the high bits.  */
++       for (int i = 0; i <= _DL_HWCAP_SIZE; ++i)
++	 if (word & (1 << i))
++	   _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP4_OFFSET + i));
+        break;
+       }
+     case AT_L1I_CACHEGEOMETRY:
+diff --git a/sysdeps/powerpc/hwcapinfo.c b/sysdeps/powerpc/hwcapinfo.c
+index 76344f285a..f6fede15a7 100644
+--- a/sysdeps/powerpc/hwcapinfo.c
++++ b/sysdeps/powerpc/hwcapinfo.c
+@@ -31,7 +31,7 @@ void
+ __tcb_parse_hwcap_and_convert_at_platform (void)
+ {
+ 
+-  uint64_t h1, h2;
++  uint64_t h1, h2, h3, h4;
+ 
+   /* Read AT_PLATFORM string from auxv and convert it to a number.  */
+   __tcb.at_platform = _dl_string_platform (GLRO (dl_platform));
+@@ -39,6 +39,8 @@ __tcb_parse_hwcap_and_convert_at_platform (void)
+   /* Read HWCAP and HWCAP2 from auxv.  */
+   h1 = GLRO (dl_hwcap);
+   h2 = GLRO (dl_hwcap2);
++  h3 = GLRO (dl_hwcap3);
++  h4 = GLRO (dl_hwcap4);
+ 
+   /* hwcap contains only the latest supported ISA, the code checks which is
+      and fills the previous supported ones.  */
+@@ -64,13 +66,16 @@ __tcb_parse_hwcap_and_convert_at_platform (void)
+   else if (h1 & PPC_FEATURE_POWER5)
+     h1 |= PPC_FEATURE_POWER4;
+ 
+-  uint64_t array_hwcaps[] = { h1, h2 };
++  uint64_t array_hwcaps[] = { h1, h2, h3, h4 };
+   init_cpu_features (&GLRO(dl_powerpc_cpu_features), array_hwcaps);
+ 
+   /* Consolidate both HWCAP and HWCAP2 into a single doubleword so that
+      we can read both in a single load later.  */
+   __tcb.hwcap = (h1 << 32) | (h2 & 0xffffffff);
+-  __tcb.hwcap_extn = 0x0;
++
++  /* Consolidate both HWCAP3 and HWCAP4 into a single doubleword so that
++     we can read both in a single load later.  */
++  __tcb.hwcap_extn = (h3 << 32) | (h4 & 0xffffffff);
+ 
+ }
+ #if IS_IN (rtld)
+diff --git a/sysdeps/unix/sysv/linux/dl-parse_auxv.h b/sysdeps/unix/sysv/linux/dl-parse_auxv.h
+index e3d758b163..ea2a58ecb1 100644
+--- a/sysdeps/unix/sysv/linux/dl-parse_auxv.h
++++ b/sysdeps/unix/sysv/linux/dl-parse_auxv.h
+@@ -47,6 +47,8 @@ void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values)
+   GLRO(dl_platform) = (void *) auxv_values[AT_PLATFORM];
+   GLRO(dl_hwcap) = auxv_values[AT_HWCAP];
+   GLRO(dl_hwcap2) = auxv_values[AT_HWCAP2];
++  GLRO(dl_hwcap3) = auxv_values[AT_HWCAP3];
++  GLRO(dl_hwcap4) = auxv_values[AT_HWCAP4];
+   GLRO(dl_clktck) = auxv_values[AT_CLKTCK];
+   GLRO(dl_fpu_control) = auxv_values[AT_FPUCW];
+   _dl_random = (void *) auxv_values[AT_RANDOM];
+diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c
+index ad3692d738..e1b14e9eb3 100644
+--- a/sysdeps/unix/sysv/linux/dl-sysdep.c
++++ b/sysdeps/unix/sysv/linux/dl-sysdep.c
+@@ -197,6 +197,8 @@ _dl_show_auxv (void)
+ 	  [AT_SYSINFO_EHDR - 2] =	{ "SYSINFO_EHDR:      0x", hex },
+ 	  [AT_RANDOM - 2] =		{ "RANDOM:            0x", hex },
+ 	  [AT_HWCAP2 - 2] =		{ "HWCAP2:            0x", hex },
++	  [AT_HWCAP3 - 2] =		{ "HWCAP3:            0x", hex },
++	  [AT_HWCAP4 - 2] =		{ "HWCAP4:            0x", hex },
+ 	  [AT_MINSIGSTKSZ - 2] =	{ "MINSIGSTKSZ:       ", dec },
+ 	  [AT_L1I_CACHESIZE - 2] =	{ "L1I_CACHESIZE:     ", dec },
+ 	  [AT_L1I_CACHEGEOMETRY - 2] =	{ "L1I_CACHEGEOMETRY: 0x", hex },
+diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
+index 8e8a5ec2ea..a947d62db6 100644
+--- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
++++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
+@@ -94,6 +94,8 @@ init_cpu_features (struct cpu_features *cpu_features, uint64_t hwcaps[])
+      which are set by __tcb_parse_hwcap_and_convert_at_platform.  */
+   cpu_features->hwcap = hwcaps[0];
+   cpu_features->hwcap2 = hwcaps[1];
++  cpu_features->hwcap3 = hwcaps[2];
++  cpu_features->hwcap4 = hwcaps[3];
+   /* Default is to use aligned memory access on optimized function unless
+      tunables is enable, since for this case user can explicit disable
+      unaligned optimizations.  */
+diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
+index 1294f0b601..e9eb6a13c8 100644
+--- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
++++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
+@@ -26,6 +26,8 @@ struct cpu_features
+   bool use_cached_memopt;
+   unsigned long int hwcap;
+   unsigned long int hwcap2;
++  unsigned long int hwcap3;
++  unsigned long int hwcap4;
+ };
+ 
+ static const char hwcap_names[] = {
+diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
+index a4705daf1c..6a00cd88cd 100644
+--- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c
++++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
+@@ -87,6 +87,12 @@ __libc_start_main_impl (int argc, char **argv,
+       case AT_HWCAP2:
+ 	_dl_hwcap2 = (unsigned long int) av->a_un.a_val;
+ 	break;
++      case AT_HWCAP3:
++	_dl_hwcap3 = (unsigned long int) av->a_un.a_val;
++	break;
++      case AT_HWCAP4:
++	_dl_hwcap4 = (unsigned long int) av->a_un.a_val;
++	break;
+       case AT_PLATFORM:
+ 	_dl_platform = (void *) av->a_un.a_val;
+ 	break;
+
+commit aad45c8ac30aa1072e54903ce6aead22702f244a
+Author: Amrita H S <amritahs@linux.ibm.com>
+Date:   Tue Mar 19 19:08:47 2024 -0500
+
+    powerpc: Placeholder and infrastructure/build support to add Power11 related changes.
+    
+    The following three changes have been added to provide initial Power11 support.
+        1. Add the directories to hold Power11 files.
+        2. Add support to select Power11 libraries based on AT_PLATFORM.
+        3. Let submachine=power11 be set automatically.
+    
+    Reviewed-by: Florian Weimer <fweimer@redhat.com>
+    Reviewed-by: Peter Bergner <bergner@linux.ibm.com>
+    (cherry picked from commit 1ea051145612f199d8716ecdf78b084b00b5a727)
+
+diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h
+index f8cb343877..b36697ba44 100644
+--- a/sysdeps/powerpc/dl-procinfo.h
++++ b/sysdeps/powerpc/dl-procinfo.h
+@@ -38,7 +38,7 @@
+ #define HWCAP_IMPORTANT		(PPC_FEATURE_HAS_ALTIVEC \
+ 				+ PPC_FEATURE_HAS_DFP)
+ 
+-#define _DL_PLATFORMS_COUNT	16
++#define _DL_PLATFORMS_COUNT	17
+ 
+ #define _DL_FIRST_PLATFORM	32
+ /* Mask to filter out platforms.  */
+@@ -62,6 +62,7 @@
+ #define PPC_PLATFORM_POWER8		13
+ #define PPC_PLATFORM_POWER9		14
+ #define PPC_PLATFORM_POWER10		15
++#define PPC_PLATFORM_POWER11		16
+ 
+ static inline const char *
+ __attribute__ ((unused))
+@@ -89,6 +90,11 @@ _dl_string_platform (const char *str)
+ 	      ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER10;
+ 	      str++;
+ 	    }
++	  else if (str[1] == '1')
++	    {
++	      ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER11;
++	      str++;
++	    }
+ 	  else
+ 	    return -1;
+ 	  break;
+diff --git a/sysdeps/powerpc/powerpc32/power11/Implies b/sysdeps/powerpc/powerpc32/power11/Implies
+new file mode 100644
+index 0000000000..051cbe0f79
+--- /dev/null
++++ b/sysdeps/powerpc/powerpc32/power11/Implies
+@@ -0,0 +1,2 @@
++powerpc/powerpc32/power10/fpu
++powerpc/powerpc32/power10
+diff --git a/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies
+new file mode 100644
+index 0000000000..58edb2861d
+--- /dev/null
++++ b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies
+@@ -0,0 +1 @@
++powerpc/powerpc32/power10/fpu/multiarch
+diff --git a/sysdeps/powerpc/powerpc32/power11/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies
+new file mode 100644
+index 0000000000..c70f0428ba
+--- /dev/null
++++ b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies
+@@ -0,0 +1 @@
++powerpc/powerpc32/power10/multiarch
+diff --git a/sysdeps/powerpc/powerpc64/be/power11/Implies b/sysdeps/powerpc/powerpc64/be/power11/Implies
+new file mode 100644
+index 0000000000..de481d1c13
+--- /dev/null
++++ b/sysdeps/powerpc/powerpc64/be/power11/Implies
+@@ -0,0 +1,2 @@
++powerpc/powerpc64/be/power10/fpu
++powerpc/powerpc64/be/power10
+diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies
+new file mode 100644
+index 0000000000..dff0e13064
+--- /dev/null
++++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies
+@@ -0,0 +1 @@
++powerpc/powerpc64/be/power10/fpu
+diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies
+new file mode 100644
+index 0000000000..c3f259e009
+--- /dev/null
++++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies
+@@ -0,0 +1 @@
++powerpc/powerpc64/be/power10/fpu/multiarch
+diff --git a/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies
+new file mode 100644
+index 0000000000..9491a394c9
+--- /dev/null
++++ b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies
+@@ -0,0 +1 @@
++powerpc/powerpc64/be/power10/multiarch
+diff --git a/sysdeps/powerpc/powerpc64/le/power11/Implies b/sysdeps/powerpc/powerpc64/le/power11/Implies
+new file mode 100644
+index 0000000000..e18182dcc1
+--- /dev/null
++++ b/sysdeps/powerpc/powerpc64/le/power11/Implies
+@@ -0,0 +1,2 @@
++powerpc/powerpc64/le/power10/fpu
++powerpc/powerpc64/le/power10
+diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies
+new file mode 100644
+index 0000000000..e41bd55684
+--- /dev/null
++++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies
+@@ -0,0 +1 @@
++powerpc/powerpc64/le/power10/fpu
+diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies
+new file mode 100644
+index 0000000000..c838d50931
+--- /dev/null
++++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies
+@@ -0,0 +1 @@
++powerpc/powerpc64/le/power10/fpu/multiarch
+diff --git a/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies
+new file mode 100644
+index 0000000000..687248c3c2
+--- /dev/null
++++ b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies
+@@ -0,0 +1 @@
++powerpc/powerpc64/le/power10/multiarch
+diff --git a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
+index 77465d9133..65d3e69303 100644
+--- a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
++++ b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
+@@ -36,9 +36,11 @@ compute_level (void)
+     return 9;
+   if (strcmp (platform, "power10") == 0)
+     return 10;
++  if (strcmp (platform, "power11") == 0)
++    return 11;
+   printf ("warning: unrecognized AT_PLATFORM value: %s\n", platform);
+-  /* Assume that the new platform supports POWER10.  */
+-  return 10;
++  /* Assume that the new platform supports POWER11.  */
++  return 11;
+ }
+ 
+ static int
+diff --git a/sysdeps/powerpc/preconfigure b/sysdeps/powerpc/preconfigure
+index 4de94089a3..9e5a07ab6d 100644
+--- a/sysdeps/powerpc/preconfigure
++++ b/sysdeps/powerpc/preconfigure
+@@ -58,7 +58,7 @@ fi
+ 
+     ;;
+ 
+-  a2|970|power[4-9]|power5x|power6+|power10)
++  a2|970|power[4-9]|power5x|power6+|power10|power11)
+     submachine=${archcpu}
+     if test ${libc_cv_cc_submachine+y}
+ then :
+diff --git a/sysdeps/powerpc/preconfigure.ac b/sysdeps/powerpc/preconfigure.ac
+index 6c63bd8257..14b6dafd4a 100644
+--- a/sysdeps/powerpc/preconfigure.ac
++++ b/sysdeps/powerpc/preconfigure.ac
+@@ -46,7 +46,7 @@ case "${machine}:${submachine}" in
+     AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="")
+     ;;
+ 
+-  a2|970|power[[4-9]]|power5x|power6+|power10)
++  a2|970|power[[4-9]]|power5x|power6+|power10|power11)
+     submachine=${archcpu}
+     AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="")
+     ;;
+
+commit 983f34a1252de3ca6f2305c211d86530ea42010e
+Author: caiyinyu <caiyinyu@loongson.cn>
+Date:   Mon Mar 11 16:07:48 2024 +0800
+
+    LoongArch: Correct {__ieee754, _}_scalb -> {__ieee754, _}_scalbf
+
+diff --git a/sysdeps/loongarch/fpu/e_scalbf.c b/sysdeps/loongarch/fpu/e_scalbf.c
+index 9f05485236..7c0395fbb5 100644
+--- a/sysdeps/loongarch/fpu/e_scalbf.c
++++ b/sysdeps/loongarch/fpu/e_scalbf.c
+@@ -57,4 +57,4 @@ __ieee754_scalbf (float x, float fn)
+ 
+   return x;
+ }
+-libm_alias_finite (__ieee754_scalb, __scalb)
++libm_alias_finite (__ieee754_scalbf, __scalbf)
+
+commit 7fc8242bf87828c935ac5df5cafb9dc7ab635fd9
+Author: H.J. Lu <hjl.tools@gmail.com>
+Date:   Fri Feb 16 07:17:10 2024 -0800
+
+    x86-64: Save APX registers in ld.so trampoline
+    
+    Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
+    ld.so trampoline.  This fixes BZ #31371.
+    
+    Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
+    be used by i386 _dl_tlsdesc_dynamic.
+    Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
+    
+    (cherry picked from commit dfb05f8e704edac70db38c4c8ee700769d91a413)
+
+diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
+index 85d0a8c943..837fd28734 100644
+--- a/sysdeps/x86/sysdep.h
++++ b/sysdeps/x86/sysdep.h
+@@ -21,14 +21,54 @@
+ 
+ #include <sysdeps/generic/sysdep.h>
+ 
++/* The extended state feature IDs in the state component bitmap.  */
++#define X86_XSTATE_X87_ID	0
++#define X86_XSTATE_SSE_ID	1
++#define X86_XSTATE_AVX_ID	2
++#define X86_XSTATE_BNDREGS_ID	3
++#define X86_XSTATE_BNDCFG_ID	4
++#define X86_XSTATE_K_ID		5
++#define X86_XSTATE_ZMM_H_ID	6
++#define X86_XSTATE_ZMM_ID	7
++#define X86_XSTATE_PKRU_ID	9
++#define X86_XSTATE_TILECFG_ID	17
++#define X86_XSTATE_TILEDATA_ID	18
++#define X86_XSTATE_APX_F_ID	19
++
++#ifdef __x86_64__
+ /* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
+    space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
+-   aligned to 16 bytes for fxsave and 64 bytes for xsave.  */
+-#define STATE_SAVE_OFFSET (8 * 7 + 8)
+-
+-/* Save SSE, AVX, AVX512, mask and bound registers.  */
+-#define STATE_SAVE_MASK \
+-  ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
++   aligned to 16 bytes for fxsave and 64 bytes for xsave.
++
++   NB: Is is non-zero because of the 128-byte red-zone.  Some registers
++   are saved on stack without adjusting stack pointer first.  When we
++   update stack pointer to allocate more space, we need to take the
++   red-zone into account.  */
++# define STATE_SAVE_OFFSET (8 * 7 + 8)
++
++/* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
++   registers are mutually exclusive.  */
++# define STATE_SAVE_MASK		\
++  ((1 << X86_XSTATE_SSE_ID)		\
++   | (1 << X86_XSTATE_AVX_ID)		\
++   | (1 << X86_XSTATE_BNDREGS_ID)	\
++   | (1 << X86_XSTATE_K_ID)		\
++   | (1 << X86_XSTATE_ZMM_H_ID) 	\
++   | (1 << X86_XSTATE_ZMM_ID)		\
++   | (1 << X86_XSTATE_APX_F_ID))
++#else
++/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
++   doesn't have red-zone, use 0 here.  */
++# define STATE_SAVE_OFFSET 0
++
++/* Save SSE, AVX, AXV512, mask and bound registers.   */
++# define STATE_SAVE_MASK		\
++  ((1 << X86_XSTATE_SSE_ID)		\
++   | (1 << X86_XSTATE_AVX_ID)		\
++   | (1 << X86_XSTATE_BNDREGS_ID)	\
++   | (1 << X86_XSTATE_K_ID)		\
++   | (1 << X86_XSTATE_ZMM_H_ID))
++#endif
+ 
+ /* Constants for bits in __x86_string_control:  */
+ 
+
+commit a364304718725a31ab141936322855c76c73e35e
+Author: H.J. Lu <hjl.tools@gmail.com>
+Date:   Mon Feb 26 06:37:03 2024 -0800
+
+    x86: Update _dl_tlsdesc_dynamic to preserve caller-saved registers
+    
+    Compiler generates the following instruction sequence for GNU2 dynamic
+    TLS access:
+    
+            leaq    tls_var@TLSDESC(%rip), %rax
+            call    *tls_var@TLSCALL(%rax)
+    
+    or
+    
+            leal    tls_var@TLSDESC(%ebx), %eax
+            call    *tls_var@TLSCALL(%eax)
+    
+    CALL instruction is transparent to compiler which assumes all registers,
+    except for EFLAGS and RAX/EAX, are unchanged after CALL.  When
+    _dl_tlsdesc_dynamic is called, it calls __tls_get_addr on the slow
+    path.  __tls_get_addr is a normal function which doesn't preserve any
+    caller-saved registers.  _dl_tlsdesc_dynamic saved and restored integer
+    caller-saved registers, but didn't preserve any other caller-saved
+    registers.  Add _dl_tlsdesc_dynamic IFUNC functions for FNSAVE, FXSAVE,
+    XSAVE and XSAVEC to save and restore all caller-saved registers.  This
+    fixes BZ #31372.
+    
+    Add GLRO(dl_x86_64_runtime_resolve) with GLRO(dl_x86_tlsdesc_dynamic)
+    to optimize elf_machine_runtime_setup.
+    Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
+    
+    (cherry picked from commit 0aac205a814a8511e98d02b91a8dc908f1c53cde)
+
+diff --git a/elf/Makefile b/elf/Makefile
+index 5d78b659ce..c5c37a9147 100644
+--- a/elf/Makefile
++++ b/elf/Makefile
+@@ -424,6 +424,7 @@ tests += \
+   tst-glibc-hwcaps-prepend \
+   tst-global1 \
+   tst-global2 \
++  tst-gnu2-tls2 \
+   tst-initfinilazyfail \
+   tst-initorder \
+   tst-initorder2 \
+@@ -846,6 +847,9 @@ modules-names += \
+   tst-filterobj-flt \
+   tst-finilazyfailmod \
+   tst-globalmod2 \
++  tst-gnu2-tls2mod0 \
++  tst-gnu2-tls2mod1 \
++  tst-gnu2-tls2mod2 \
+   tst-initlazyfailmod \
+   tst-initorder2a \
+   tst-initorder2b \
+@@ -3044,8 +3048,22 @@ $(objpfx)tst-tlsgap.out: \
+   $(objpfx)tst-tlsgap-mod0.so \
+   $(objpfx)tst-tlsgap-mod1.so \
+   $(objpfx)tst-tlsgap-mod2.so
++
++$(objpfx)tst-gnu2-tls2: $(shared-thread-library)
++$(objpfx)tst-gnu2-tls2.out: \
++  $(objpfx)tst-gnu2-tls2mod0.so \
++  $(objpfx)tst-gnu2-tls2mod1.so \
++  $(objpfx)tst-gnu2-tls2mod2.so
++
+ ifeq (yes,$(have-mtls-dialect-gnu2))
++# This test fails if dl_tlsdesc_dynamic doesn't preserve all caller-saved
++# registers.  See https://sourceware.org/bugzilla/show_bug.cgi?id=31372
++test-xfail-tst-gnu2-tls2 = yes
++
+ CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2
+ CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2
+ CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2
++CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=gnu2
++CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=gnu2
++CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=gnu2
+ endif
+diff --git a/elf/tst-gnu2-tls2.c b/elf/tst-gnu2-tls2.c
+new file mode 100644
+index 0000000000..7ac04d7f33
+--- /dev/null
++++ b/elf/tst-gnu2-tls2.c
+@@ -0,0 +1,122 @@
++/* Test TLSDESC relocation.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include <stdio.h>
++#include <stdlib.h>
++#include <string.h>
++#include <dlfcn.h>
++#include <pthread.h>
++#include <support/xdlfcn.h>
++#include <support/xthread.h>
++#include <support/check.h>
++#include <support/test-driver.h>
++#include "tst-gnu2-tls2.h"
++
++#ifndef IS_SUPPORTED
++# define IS_SUPPORTED() true
++#endif
++
++/* An architecture can define it to clobber caller-saved registers in
++   malloc below to verify that the implicit TLSDESC call won't change
++   caller-saved registers.  */
++#ifndef PREPARE_MALLOC
++# define PREPARE_MALLOC()
++#endif
++
++extern void * __libc_malloc (size_t);
++
++size_t malloc_counter = 0;
++
++void *
++malloc (size_t n)
++{
++  PREPARE_MALLOC ();
++  malloc_counter++;
++  return __libc_malloc (n);
++}
++
++static void *mod[3];
++#ifndef MOD
++# define MOD(i) "tst-gnu2-tls2mod" #i ".so"
++#endif
++static const char *modname[3] = { MOD(0), MOD(1), MOD(2) };
++#undef MOD
++
++static void
++open_mod (int i)
++{
++  mod[i] = xdlopen (modname[i], RTLD_LAZY);
++  printf ("open %s\n", modname[i]);
++}
++
++static void
++close_mod (int i)
++{
++  xdlclose (mod[i]);
++  mod[i] = NULL;
++  printf ("close %s\n", modname[i]);
++}
++
++static void
++access_mod (int i, const char *sym)
++{
++  struct tls var = { -1, -1, -1, -1 };
++  struct tls *(*f) (struct tls *) = xdlsym (mod[i], sym);
++  /* Check that our malloc is called.  */
++  malloc_counter = 0;
++  struct tls *p = f (&var);
++  TEST_VERIFY (malloc_counter != 0);
++  printf ("access %s: %s() = %p\n", modname[i], sym, p);
++  TEST_VERIFY_EXIT (memcmp (p, &var, sizeof (var)) == 0);
++  ++(p->a);
++}
++
++static void *
++start (void *arg)
++{
++  /* The DTV generation is at the last dlopen of mod0 and the
++     entry for mod1 is NULL.  */
++
++  open_mod (1); /* Reuse modid of mod1. Uses dynamic TLS.  */
++
++  /* Force the slow path in GNU2 TLS descriptor call.  */
++  access_mod (1, "apply_tls");
++
++  return arg;
++}
++
++static int
++do_test (void)
++{
++  if (!IS_SUPPORTED ())
++    return EXIT_UNSUPPORTED;
++
++  open_mod (0);
++  open_mod (1);
++  open_mod (2);
++  close_mod (0);
++  close_mod (1); /* Create modid gap at mod1.  */
++  open_mod (0); /* Reuse modid of mod0, bump generation count.  */
++
++  /* Create a thread where DTV of mod1 is NULL.  */
++  pthread_t t = xpthread_create (NULL, start, NULL);
++  xpthread_join (t);
++  return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/elf/tst-gnu2-tls2.h b/elf/tst-gnu2-tls2.h
+new file mode 100644
+index 0000000000..77964a57a3
+--- /dev/null
++++ b/elf/tst-gnu2-tls2.h
+@@ -0,0 +1,36 @@
++/* Test TLSDESC relocation.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <stdint.h>
++
++struct tls
++{
++  int64_t a, b, c, d;
++};
++
++extern struct tls *apply_tls (struct tls *);
++
++/* An architecture can define them to verify that clobber caller-saved
++   registers aren't changed by the implicit TLSDESC call.  */
++#ifndef BEFORE_TLSDESC_CALL
++# define BEFORE_TLSDESC_CALL()
++#endif
++
++#ifndef AFTER_TLSDESC_CALL
++# define AFTER_TLSDESC_CALL()
++#endif
+diff --git a/elf/tst-gnu2-tls2mod0.c b/elf/tst-gnu2-tls2mod0.c
+new file mode 100644
+index 0000000000..45556a0e17
+--- /dev/null
++++ b/elf/tst-gnu2-tls2mod0.c
+@@ -0,0 +1,31 @@
++/* DSO used by tst-gnu2-tls2.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include "tst-gnu2-tls2.h"
++
++__thread struct tls tls_var0 __attribute__ ((visibility ("hidden")));
++
++struct tls *
++apply_tls (struct tls *p)
++{
++  BEFORE_TLSDESC_CALL ();
++  tls_var0 = *p;
++  struct tls *ret = &tls_var0;
++  AFTER_TLSDESC_CALL ();
++  return ret;
++}
+diff --git a/elf/tst-gnu2-tls2mod1.c b/elf/tst-gnu2-tls2mod1.c
+new file mode 100644
+index 0000000000..e10b9dbc0a
+--- /dev/null
++++ b/elf/tst-gnu2-tls2mod1.c
+@@ -0,0 +1,31 @@
++/* DSO used by tst-gnu2-tls2.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include "tst-gnu2-tls2.h"
++
++__thread struct tls tls_var1[100] __attribute__ ((visibility ("hidden")));
++
++struct tls *
++apply_tls (struct tls *p)
++{
++  BEFORE_TLSDESC_CALL ();
++  tls_var1[1] = *p;
++  struct tls *ret = &tls_var1[1];
++  AFTER_TLSDESC_CALL ();
++  return ret;
++}
+diff --git a/elf/tst-gnu2-tls2mod2.c b/elf/tst-gnu2-tls2mod2.c
+new file mode 100644
+index 0000000000..141af51e55
+--- /dev/null
++++ b/elf/tst-gnu2-tls2mod2.c
+@@ -0,0 +1,31 @@
++/* DSO used by tst-gnu2-tls2.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include "tst-gnu2-tls2.h"
++
++__thread struct tls tls_var2 __attribute__ ((visibility ("hidden")));
++
++struct tls *
++apply_tls (struct tls *p)
++{
++  BEFORE_TLSDESC_CALL ();
++  tls_var2 = *p;
++  struct tls *ret = &tls_var2;
++  AFTER_TLSDESC_CALL ();
++  return ret;
++}
+diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
+index fc1ef96587..50d74fe6e9 100644
+--- a/sysdeps/i386/dl-machine.h
++++ b/sysdeps/i386/dl-machine.h
+@@ -347,7 +347,7 @@ and creates an unsatisfiable circular dependency.\n",
+ 		  {
+ 		    td->arg = _dl_make_tlsdesc_dynamic
+ 		      (sym_map, sym->st_value + (ElfW(Word))td->arg);
+-		    td->entry = _dl_tlsdesc_dynamic;
++		    td->entry = GLRO(dl_x86_tlsdesc_dynamic);
+ 		  }
+ 		else
+ #  endif
+diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h
+new file mode 100644
+index 0000000000..3627028577
+--- /dev/null
++++ b/sysdeps/i386/dl-tlsdesc-dynamic.h
+@@ -0,0 +1,190 @@
++/* Thread-local storage handling in the ELF dynamic linker.  i386 version.
++   Copyright (C) 2004-2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#undef REGISTER_SAVE_AREA
++
++#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0
++# error STATE_SAVE_ALIGNMENT must be multiple of 16
++#endif
++
++#if DL_RUNTIME_RESOLVE_REALIGN_STACK
++# ifdef USE_FNSAVE
++#  error USE_FNSAVE shouldn't be defined
++# endif
++# ifdef USE_FXSAVE
++/* Use fxsave to save all registers.  */
++#  define REGISTER_SAVE_AREA	512
++# endif
++#else
++# ifdef USE_FNSAVE
++/* Use fnsave to save x87 FPU stack registers.  */
++#  define REGISTER_SAVE_AREA	108
++# else
++#  ifndef USE_FXSAVE
++#   error USE_FXSAVE must be defined
++#  endif
++/* Use fxsave to save all registers.  Add 12 bytes to align the stack
++   to 16 bytes.  */
++#  define REGISTER_SAVE_AREA	(512 + 12)
++# endif
++#endif
++
++	.hidden _dl_tlsdesc_dynamic
++	.global	_dl_tlsdesc_dynamic
++	.type	_dl_tlsdesc_dynamic,@function
++
++     /* This function is used for symbols that need dynamic TLS.
++
++	%eax points to the TLS descriptor, such that 0(%eax) points to
++	_dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct
++	tlsdesc_dynamic_arg object.  It must return in %eax the offset
++	between the thread pointer and the object denoted by the
++	argument, without clobbering any registers.
++
++	The assembly code that follows is a rendition of the following
++	C code, hand-optimized a little bit.
++
++ptrdiff_t
++__attribute__ ((__regparm__ (1)))
++_dl_tlsdesc_dynamic (struct tlsdesc *tdp)
++{
++  struct tlsdesc_dynamic_arg *td = tdp->arg;
++  dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
++  if (__builtin_expect (td->gen_count <= dtv[0].counter
++			&& (dtv[td->tlsinfo.ti_module].pointer.val
++			    != TLS_DTV_UNALLOCATED),
++			1))
++    return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
++      - __thread_pointer;
++
++  return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
++}
++*/
++	cfi_startproc
++	.align 16
++_dl_tlsdesc_dynamic:
++	/* Like all TLS resolvers, preserve call-clobbered registers.
++	   We need two scratch regs anyway.  */
++	subl	$32, %esp
++	cfi_adjust_cfa_offset (32)
++	movl	%ecx, 20(%esp)
++	movl	%edx, 24(%esp)
++	movl	TLSDESC_ARG(%eax), %eax
++	movl	%gs:DTV_OFFSET, %edx
++	movl	TLSDESC_GEN_COUNT(%eax), %ecx
++	cmpl	(%edx), %ecx
++	ja	2f
++	movl	TLSDESC_MODID(%eax), %ecx
++	movl	(%edx,%ecx,8), %edx
++	cmpl	$-1, %edx
++	je	2f
++	movl	TLSDESC_MODOFF(%eax), %eax
++	addl	%edx, %eax
++1:
++	movl	20(%esp), %ecx
++	subl	%gs:0, %eax
++	movl	24(%esp), %edx
++	addl	$32, %esp
++	cfi_adjust_cfa_offset (-32)
++	ret
++	.p2align 4,,7
++2:
++	cfi_adjust_cfa_offset (32)
++#if DL_RUNTIME_RESOLVE_REALIGN_STACK
++	movl	%ebx, -28(%esp)
++	movl	%esp, %ebx
++	cfi_def_cfa_register(%ebx)
++	and	$-STATE_SAVE_ALIGNMENT, %esp
++#endif
++#ifdef REGISTER_SAVE_AREA
++	subl	$REGISTER_SAVE_AREA, %esp
++# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
++	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
++# endif
++#else
++# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
++#  error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true
++# endif
++	/* Allocate stack space of the required size to save the state.  */
++	LOAD_PIC_REG (cx)
++	subl	RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp
++#endif
++#ifdef USE_FNSAVE
++	fnsave	(%esp)
++#elif defined USE_FXSAVE
++	fxsave	(%esp)
++#else
++	/* Save the argument for ___tls_get_addr in EAX.  */
++	movl	%eax, %ecx
++	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
++	xorl	%edx, %edx
++	/* Clear the XSAVE Header.  */
++# ifdef USE_XSAVE
++	movl	%edx, (512)(%esp)
++	movl	%edx, (512 + 4 * 1)(%esp)
++	movl	%edx, (512 + 4 * 2)(%esp)
++	movl	%edx, (512 + 4 * 3)(%esp)
++# endif
++	movl	%edx, (512 + 4 * 4)(%esp)
++	movl	%edx, (512 + 4 * 5)(%esp)
++	movl	%edx, (512 + 4 * 6)(%esp)
++	movl	%edx, (512 + 4 * 7)(%esp)
++	movl	%edx, (512 + 4 * 8)(%esp)
++	movl	%edx, (512 + 4 * 9)(%esp)
++	movl	%edx, (512 + 4 * 10)(%esp)
++	movl	%edx, (512 + 4 * 11)(%esp)
++	movl	%edx, (512 + 4 * 12)(%esp)
++	movl	%edx, (512 + 4 * 13)(%esp)
++	movl	%edx, (512 + 4 * 14)(%esp)
++	movl	%edx, (512 + 4 * 15)(%esp)
++# ifdef USE_XSAVE
++	xsave	(%esp)
++# else
++	xsavec	(%esp)
++# endif
++	/* Restore the argument for ___tls_get_addr in EAX.  */
++	movl	%ecx, %eax
++#endif
++	call	HIDDEN_JUMPTARGET (___tls_get_addr)
++	/* Get register content back.  */
++#ifdef USE_FNSAVE
++	frstor	(%esp)
++#elif defined USE_FXSAVE
++	fxrstor	(%esp)
++#else
++	/* Save and retore ___tls_get_addr return value stored in EAX.  */
++	movl	%eax, %ecx
++	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
++	xorl	%edx, %edx
++	xrstor	(%esp)
++	movl	%ecx, %eax
++#endif
++#if DL_RUNTIME_RESOLVE_REALIGN_STACK
++	mov	%ebx, %esp
++	cfi_def_cfa_register(%esp)
++	movl	-28(%esp), %ebx
++	cfi_restore(%ebx)
++#else
++	addl	$REGISTER_SAVE_AREA, %esp
++	cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
++#endif
++	jmp	1b
++	cfi_endproc
++	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
++
++#undef STATE_SAVE_ALIGNMENT
+diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S
+index 90d93caa0c..f002feee56 100644
+--- a/sysdeps/i386/dl-tlsdesc.S
++++ b/sysdeps/i386/dl-tlsdesc.S
+@@ -18,8 +18,27 @@
+ 
+ #include <sysdep.h>
+ #include <tls.h>
++#include <cpu-features-offsets.h>
++#include <features-offsets.h>
+ #include "tlsdesc.h"
+ 
++#ifndef DL_STACK_ALIGNMENT
++/* Due to GCC bug:
++
++   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
++
++   __tls_get_addr may be called with 4-byte stack alignment.  Although
++   this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
++   that stack will be always aligned at 16 bytes.  */
++# define DL_STACK_ALIGNMENT 4
++#endif
++
++/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align
++   stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr.  */
++#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
++  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
++   || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
++
+ 	.text
+ 
+      /* This function is used to compute the TP offset for symbols in
+@@ -65,69 +84,35 @@ _dl_tlsdesc_undefweak:
+ 	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+ 
+ #ifdef SHARED
+-	.hidden _dl_tlsdesc_dynamic
+-	.global	_dl_tlsdesc_dynamic
+-	.type	_dl_tlsdesc_dynamic,@function
+-
+-     /* This function is used for symbols that need dynamic TLS.
+-
+-	%eax points to the TLS descriptor, such that 0(%eax) points to
+-	_dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct
+-	tlsdesc_dynamic_arg object.  It must return in %eax the offset
+-	between the thread pointer and the object denoted by the
+-	argument, without clobbering any registers.
+-
+-	The assembly code that follows is a rendition of the following
+-	C code, hand-optimized a little bit.
+-
+-ptrdiff_t
+-__attribute__ ((__regparm__ (1)))
+-_dl_tlsdesc_dynamic (struct tlsdesc *tdp)
+-{
+-  struct tlsdesc_dynamic_arg *td = tdp->arg;
+-  dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
+-  if (__builtin_expect (td->gen_count <= dtv[0].counter
+-			&& (dtv[td->tlsinfo.ti_module].pointer.val
+-			    != TLS_DTV_UNALLOCATED),
+-			1))
+-    return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
+-      - __thread_pointer;
+-
+-  return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+-}
+-*/
+-	cfi_startproc
+-	.align 16
+-_dl_tlsdesc_dynamic:
+-	/* Like all TLS resolvers, preserve call-clobbered registers.
+-	   We need two scratch regs anyway.  */
+-	subl	$28, %esp
+-	cfi_adjust_cfa_offset (28)
+-	movl	%ecx, 20(%esp)
+-	movl	%edx, 24(%esp)
+-	movl	TLSDESC_ARG(%eax), %eax
+-	movl	%gs:DTV_OFFSET, %edx
+-	movl	TLSDESC_GEN_COUNT(%eax), %ecx
+-	cmpl	(%edx), %ecx
+-	ja	.Lslow
+-	movl	TLSDESC_MODID(%eax), %ecx
+-	movl	(%edx,%ecx,8), %edx
+-	cmpl	$-1, %edx
+-	je	.Lslow
+-	movl	TLSDESC_MODOFF(%eax), %eax
+-	addl	%edx, %eax
+-.Lret:
+-	movl	20(%esp), %ecx
+-	subl	%gs:0, %eax
+-	movl	24(%esp), %edx
+-	addl	$28, %esp
+-	cfi_adjust_cfa_offset (-28)
+-	ret
+-	.p2align 4,,7
+-.Lslow:
+-	cfi_adjust_cfa_offset (28)
+-	call	HIDDEN_JUMPTARGET (___tls_get_addr)
+-	jmp	.Lret
+-	cfi_endproc
+-	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
++# define USE_FNSAVE
++# define MINIMUM_ALIGNMENT	4
++# define STATE_SAVE_ALIGNMENT	4
++# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_fnsave
++# include "dl-tlsdesc-dynamic.h"
++# undef _dl_tlsdesc_dynamic
++# undef MINIMUM_ALIGNMENT
++# undef USE_FNSAVE
++
++# define MINIMUM_ALIGNMENT	16
++
++# define USE_FXSAVE
++# define STATE_SAVE_ALIGNMENT	16
++# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_fxsave
++# include "dl-tlsdesc-dynamic.h"
++# undef _dl_tlsdesc_dynamic
++# undef USE_FXSAVE
++
++# define USE_XSAVE
++# define STATE_SAVE_ALIGNMENT	64
++# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_xsave
++# include "dl-tlsdesc-dynamic.h"
++# undef _dl_tlsdesc_dynamic
++# undef USE_XSAVE
++
++# define USE_XSAVEC
++# define STATE_SAVE_ALIGNMENT	64
++# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_xsavec
++# include "dl-tlsdesc-dynamic.h"
++# undef _dl_tlsdesc_dynamic
++# undef USE_XSAVEC
+ #endif /* SHARED */
+diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
+index 4d50b327b5..992aabe43e 100644
+--- a/sysdeps/x86/Makefile
++++ b/sysdeps/x86/Makefile
+@@ -1,5 +1,5 @@
+ ifeq ($(subdir),csu)
+-gen-as-const-headers += cpu-features-offsets.sym
++gen-as-const-headers += cpu-features-offsets.sym features-offsets.sym
+ endif
+ 
+ ifeq ($(subdir),elf)
+@@ -86,6 +86,11 @@ endif
+ tst-ifunc-isa-2-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-SSE4_2,-AVX,-AVX2,-AVX512F
+ tst-ifunc-isa-2-static-ENV = $(tst-ifunc-isa-2-ENV)
+ tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd)
++
++CFLAGS-tst-gnu2-tls2.c += -msse
++CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell
++CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell
++CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell
+ endif
+ 
+ ifeq ($(subdir),math)
+diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
+index 25e6622a79..835113b42f 100644
+--- a/sysdeps/x86/cpu-features.c
++++ b/sysdeps/x86/cpu-features.c
+@@ -27,8 +27,13 @@
+ extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
+   attribute_hidden;
+ 
+-#if defined SHARED && defined __x86_64__
+-# include <dl-plt-rewrite.h>
++#if defined SHARED
++extern void _dl_tlsdesc_dynamic_fxsave (void) attribute_hidden;
++extern void _dl_tlsdesc_dynamic_xsave (void) attribute_hidden;
++extern void _dl_tlsdesc_dynamic_xsavec (void) attribute_hidden;
++
++# ifdef __x86_64__
++#  include <dl-plt-rewrite.h>
+ 
+ static void
+ TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
+@@ -47,6 +52,15 @@ TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
+ 		 : plt_rewrite_jmp);
+     }
+ }
++# else
++extern void _dl_tlsdesc_dynamic_fnsave (void) attribute_hidden;
++# endif
++#endif
++
++#ifdef __x86_64__
++extern void _dl_runtime_resolve_fxsave (void) attribute_hidden;
++extern void _dl_runtime_resolve_xsave (void) attribute_hidden;
++extern void _dl_runtime_resolve_xsavec (void) attribute_hidden;
+ #endif
+ 
+ #ifdef __LP64__
+@@ -1130,6 +1144,44 @@ no_cpuid:
+ 	       TUNABLE_CALLBACK (set_x86_shstk));
+ #endif
+ 
++  if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
++    {
++      if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
++	{
++#ifdef __x86_64__
++	  GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsavec;
++#endif
++#ifdef SHARED
++	  GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsavec;
++#endif
++	}
++      else
++	{
++#ifdef __x86_64__
++	  GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsave;
++#endif
++#ifdef SHARED
++	  GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsave;
++#endif
++	}
++    }
++  else
++    {
++#ifdef __x86_64__
++      GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
++# ifdef SHARED
++      GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
++# endif
++#else
++# ifdef SHARED
++      if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
++	GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
++      else
++	GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
++# endif
++#endif
++    }
++
+ #ifdef SHARED
+ # ifdef __x86_64__
+   TUNABLE_GET (plt_rewrite, tunable_val_t *,
+diff --git a/sysdeps/x86/dl-procinfo.c b/sysdeps/x86/dl-procinfo.c
+index ee957b4d70..5920d4b320 100644
+--- a/sysdeps/x86/dl-procinfo.c
++++ b/sysdeps/x86/dl-procinfo.c
+@@ -86,3 +86,19 @@ PROCINFO_CLASS const char _dl_x86_platforms[4][9]
+ #else
+ ,
+ #endif
++
++#if defined SHARED && !IS_IN (ldconfig)
++# if !defined PROCINFO_DECL
++  ._dl_x86_tlsdesc_dynamic
++# else
++PROCINFO_CLASS void * _dl_x86_tlsdesc_dynamic
++# endif
++# ifndef PROCINFO_DECL
++= NULL
++# endif
++# ifdef PROCINFO_DECL
++;
++# else
++,
++# endif
++#endif
+diff --git a/sysdeps/x86_64/features-offsets.sym b/sysdeps/x86/features-offsets.sym
+similarity index 89%
+rename from sysdeps/x86_64/features-offsets.sym
+rename to sysdeps/x86/features-offsets.sym
+index 9e4be3393a..77e990c705 100644
+--- a/sysdeps/x86_64/features-offsets.sym
++++ b/sysdeps/x86/features-offsets.sym
+@@ -3,4 +3,6 @@
+ #include <ldsodefs.h>
+ 
+ RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET offsetof (struct rtld_global_ro, _dl_x86_cpu_features)
++#ifdef __x86_64__
+ RTLD_GLOBAL_DL_X86_FEATURE_1_OFFSET offsetof (struct rtld_global, _dl_x86_feature_1)
++#endif
+diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
+index 837fd28734..485cad9c02 100644
+--- a/sysdeps/x86/sysdep.h
++++ b/sysdeps/x86/sysdep.h
+@@ -70,6 +70,12 @@
+    | (1 << X86_XSTATE_ZMM_H_ID))
+ #endif
+ 
++/* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
++   Compiler assumes that all registers, including x87 FPU stack registers,
++   are unchanged after CALL, except for EFLAGS and RAX/EAX.  */
++#define TLSDESC_CALL_STATE_SAVE_MASK	\
++  (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
++
+ /* Constants for bits in __x86_string_control:  */
+ 
+ /* Avoid short distance REP MOVSB.  */
+diff --git a/sysdeps/x86/tst-gnu2-tls2.c b/sysdeps/x86/tst-gnu2-tls2.c
+new file mode 100644
+index 0000000000..de900a423b
+--- /dev/null
++++ b/sysdeps/x86/tst-gnu2-tls2.c
+@@ -0,0 +1,20 @@
++#ifndef __x86_64__
++#include <sys/platform/x86.h>
++
++#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
++#endif
++
++/* Clear XMM0...XMM7  */
++#define PREPARE_MALLOC()				\
++{							\
++  asm volatile ("xorps %%xmm0, %%xmm0" : : : "xmm0" );	\
++  asm volatile ("xorps %%xmm1, %%xmm1" : : : "xmm1" );	\
++  asm volatile ("xorps %%xmm2, %%xmm2" : : : "xmm2" );	\
++  asm volatile ("xorps %%xmm3, %%xmm3" : : : "xmm3" );	\
++  asm volatile ("xorps %%xmm4, %%xmm4" : : : "xmm4" );	\
++  asm volatile ("xorps %%xmm5, %%xmm5" : : : "xmm5" );	\
++  asm volatile ("xorps %%xmm6, %%xmm6" : : : "xmm6" );	\
++  asm volatile ("xorps %%xmm7, %%xmm7" : : : "xmm7" );	\
++}
++
++#include <elf/tst-gnu2-tls2.c>
+diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
+index 90f4ecfd26..e8babc9a4e 100644
+--- a/sysdeps/x86_64/Makefile
++++ b/sysdeps/x86_64/Makefile
+@@ -10,7 +10,7 @@ LDFLAGS-rtld += -Wl,-z,nomark-plt
+ endif
+ 
+ ifeq ($(subdir),csu)
+-gen-as-const-headers += features-offsets.sym link-defines.sym
++gen-as-const-headers += link-defines.sym
+ endif
+ 
+ ifeq ($(subdir),gmon)
+diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
+index 6d605d0d32..ff5d45f7cb 100644
+--- a/sysdeps/x86_64/dl-machine.h
++++ b/sysdeps/x86_64/dl-machine.h
+@@ -71,9 +71,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ 			   int lazy, int profile)
+ {
+   Elf64_Addr *got;
+-  extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden;
+-  extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden;
+-  extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden;
+   extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden;
+   extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden;
+   extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden;
+@@ -96,8 +93,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+       /* Identify this shared object.  */
+       *(ElfW(Addr) *) (got + 1) = (ElfW(Addr)) l;
+ 
+-      const struct cpu_features* cpu_features = __get_cpu_features ();
+-
+ #ifdef SHARED
+       /* The got[2] entry contains the address of a function which gets
+ 	 called to get the address of a so far unresolved function and
+@@ -107,6 +102,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ 	 end in this function.  */
+       if (__glibc_unlikely (profile))
+ 	{
++	  const struct cpu_features* cpu_features = __get_cpu_features ();
+ 	  if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F))
+ 	    *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512;
+ 	  else if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX))
+@@ -126,15 +122,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ 	  /* This function will get called to fix up the GOT entry
+ 	     indicated by the offset on the stack, and then jump to
+ 	     the resolved address.  */
+-	  if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
+-	      || GLRO(dl_x86_cpu_features).xsave_state_size != 0)
+-	    *(ElfW(Addr) *) (got + 2)
+-	      = (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)
+-		 ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
+-		 : (ElfW(Addr)) &_dl_runtime_resolve_xsave);
+-	  else
+-	    *(ElfW(Addr) *) (got + 2)
+-	      = (ElfW(Addr)) &_dl_runtime_resolve_fxsave;
++	  *(ElfW(Addr) *) (got + 2)
++	    = (ElfW(Addr)) GLRO(dl_x86_64_runtime_resolve);
+ 	}
+     }
+ 
+@@ -383,7 +372,7 @@ and creates an unsatisfiable circular dependency.\n",
+ 		  {
+ 		    td->arg = _dl_make_tlsdesc_dynamic
+ 		      (sym_map, sym->st_value + reloc->r_addend);
+-		    td->entry = _dl_tlsdesc_dynamic;
++		    td->entry = GLRO(dl_x86_tlsdesc_dynamic);
+ 		  }
+ 		else
+ #  endif
+diff --git a/sysdeps/x86_64/dl-procinfo.c b/sysdeps/x86_64/dl-procinfo.c
+index 4d1d790fbb..06637a8154 100644
+--- a/sysdeps/x86_64/dl-procinfo.c
++++ b/sysdeps/x86_64/dl-procinfo.c
+@@ -41,5 +41,21 @@
+ 
+ #include <sysdeps/x86/dl-procinfo.c>
+ 
++#if !IS_IN (ldconfig)
++# if !defined PROCINFO_DECL && defined SHARED
++  ._dl_x86_64_runtime_resolve
++# else
++PROCINFO_CLASS void * _dl_x86_64_runtime_resolve
++# endif
++# ifndef PROCINFO_DECL
++= NULL
++# endif
++# if !defined SHARED || defined PROCINFO_DECL
++;
++# else
++,
++# endif
++#endif
++
+ #undef PROCINFO_DECL
+ #undef PROCINFO_CLASS
+diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
+new file mode 100644
+index 0000000000..0c2e8d5320
+--- /dev/null
++++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
+@@ -0,0 +1,166 @@
++/* Thread-local storage handling in the ELF dynamic linker.  x86_64 version.
++   Copyright (C) 2004-2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#ifndef SECTION
++# define SECTION(p)	p
++#endif
++
++#undef REGISTER_SAVE_AREA
++#undef LOCAL_STORAGE_AREA
++#undef BASE
++
++#include "dl-trampoline-state.h"
++
++	.section SECTION(.text),"ax",@progbits
++
++	.hidden _dl_tlsdesc_dynamic
++	.global	_dl_tlsdesc_dynamic
++	.type	_dl_tlsdesc_dynamic,@function
++
++     /* %rax points to the TLS descriptor, such that 0(%rax) points to
++	_dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct
++	tlsdesc_dynamic_arg object.  It must return in %rax the offset
++	between the thread pointer and the object denoted by the
++	argument, without clobbering any registers.
++
++	The assembly code that follows is a rendition of the following
++	C code, hand-optimized a little bit.
++
++ptrdiff_t
++_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
++{
++  struct tlsdesc_dynamic_arg *td = tdp->arg;
++  dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
++  if (__builtin_expect (td->gen_count <= dtv[0].counter
++			&& (dtv[td->tlsinfo.ti_module].pointer.val
++			    != TLS_DTV_UNALLOCATED),
++			1))
++    return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
++      - __thread_pointer;
++
++  return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer;
++}
++*/
++	cfi_startproc
++	.align 16
++_dl_tlsdesc_dynamic:
++	_CET_ENDBR
++	/* Preserve call-clobbered registers that we modify.
++	   We need two scratch regs anyway.  */
++	movq	%rsi, -16(%rsp)
++	mov	%fs:DTV_OFFSET, %RSI_LP
++	movq	%rdi, -8(%rsp)
++	movq	TLSDESC_ARG(%rax), %rdi
++	movq	(%rsi), %rax
++	cmpq	%rax, TLSDESC_GEN_COUNT(%rdi)
++	ja	2f
++	movq	TLSDESC_MODID(%rdi), %rax
++	salq	$4, %rax
++	movq	(%rax,%rsi), %rax
++	cmpq	$-1, %rax
++	je	2f
++	addq	TLSDESC_MODOFF(%rdi), %rax
++1:
++	movq	-16(%rsp), %rsi
++	sub	%fs:0, %RAX_LP
++	movq	-8(%rsp), %rdi
++	ret
++2:
++#if DL_RUNTIME_RESOLVE_REALIGN_STACK
++	movq	%rbx, -24(%rsp)
++	mov	%RSP_LP, %RBX_LP
++	cfi_def_cfa_register(%rbx)
++	and	$-STATE_SAVE_ALIGNMENT, %RSP_LP
++#endif
++#ifdef REGISTER_SAVE_AREA
++# if DL_RUNTIME_RESOLVE_REALIGN_STACK
++	/* STATE_SAVE_OFFSET has space for 8 integer registers.  But we
++	   need space for RCX, RDX, RSI, RDI, R8, R9, R10 and R11, plus
++	   RBX above.  */
++	sub	$(REGISTER_SAVE_AREA + STATE_SAVE_ALIGNMENT), %RSP_LP
++# else
++	sub	$REGISTER_SAVE_AREA, %RSP_LP
++	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
++# endif
++#else
++	/* Allocate stack space of the required size to save the state.  */
++	sub	_rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
++#endif
++	/* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
++	   r10 and r11.  */
++	movq	%rcx, REGISTER_SAVE_RCX(%rsp)
++	movq	%rdx, REGISTER_SAVE_RDX(%rsp)
++	movq	%r8, REGISTER_SAVE_R8(%rsp)
++	movq	%r9, REGISTER_SAVE_R9(%rsp)
++	movq	%r10, REGISTER_SAVE_R10(%rsp)
++	movq	%r11, REGISTER_SAVE_R11(%rsp)
++#ifdef USE_FXSAVE
++	fxsave	STATE_SAVE_OFFSET(%rsp)
++#else
++	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
++	xorl	%edx, %edx
++	/* Clear the XSAVE Header.  */
++# ifdef USE_XSAVE
++	movq	%rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
++	movq	%rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
++# endif
++	movq	%rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
++	movq	%rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
++	movq	%rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
++	movq	%rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
++	movq	%rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
++	movq	%rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
++# ifdef USE_XSAVE
++	xsave	STATE_SAVE_OFFSET(%rsp)
++# else
++	xsavec	STATE_SAVE_OFFSET(%rsp)
++# endif
++#endif
++	/* %rdi already points to the tlsinfo data structure.  */
++	call	HIDDEN_JUMPTARGET (__tls_get_addr)
++	# Get register content back.
++#ifdef USE_FXSAVE
++	fxrstor	STATE_SAVE_OFFSET(%rsp)
++#else
++	/* Save and retore __tls_get_addr return value stored in RAX.  */
++	mov	%RAX_LP, %RCX_LP
++	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
++	xorl	%edx, %edx
++	xrstor	STATE_SAVE_OFFSET(%rsp)
++	mov	%RCX_LP, %RAX_LP
++#endif
++	movq	REGISTER_SAVE_R11(%rsp), %r11
++	movq	REGISTER_SAVE_R10(%rsp), %r10
++	movq	REGISTER_SAVE_R9(%rsp), %r9
++	movq	REGISTER_SAVE_R8(%rsp), %r8
++	movq	REGISTER_SAVE_RDX(%rsp), %rdx
++	movq	REGISTER_SAVE_RCX(%rsp), %rcx
++#if DL_RUNTIME_RESOLVE_REALIGN_STACK
++	mov	%RBX_LP, %RSP_LP
++	cfi_def_cfa_register(%rsp)
++	movq	-24(%rsp), %rbx
++	cfi_restore(%rbx)
++#else
++	add	$REGISTER_SAVE_AREA, %RSP_LP
++	cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
++#endif
++	jmp	1b
++	cfi_endproc
++	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
++
++#undef STATE_SAVE_ALIGNMENT
+diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
+index f748af2ece..ea69f5223a 100644
+--- a/sysdeps/x86_64/dl-tlsdesc.S
++++ b/sysdeps/x86_64/dl-tlsdesc.S
+@@ -18,7 +18,19 @@
+ 
+ #include <sysdep.h>
+ #include <tls.h>
++#include <cpu-features-offsets.h>
++#include <features-offsets.h>
+ #include "tlsdesc.h"
++#include "dl-trampoline-save.h"
++
++/* Area on stack to save and restore registers used for parameter
++   passing when calling _dl_tlsdesc_dynamic.  */
++#define REGISTER_SAVE_RCX	0
++#define REGISTER_SAVE_RDX	(REGISTER_SAVE_RCX + 8)
++#define REGISTER_SAVE_R8	(REGISTER_SAVE_RDX + 8)
++#define REGISTER_SAVE_R9	(REGISTER_SAVE_R8 + 8)
++#define REGISTER_SAVE_R10	(REGISTER_SAVE_R9 + 8)
++#define REGISTER_SAVE_R11	(REGISTER_SAVE_R10 + 8)
+ 
+ 	.text
+ 
+@@ -67,80 +79,24 @@ _dl_tlsdesc_undefweak:
+ 	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+ 
+ #ifdef SHARED
+-	.hidden _dl_tlsdesc_dynamic
+-	.global	_dl_tlsdesc_dynamic
+-	.type	_dl_tlsdesc_dynamic,@function
+-
+-     /* %rax points to the TLS descriptor, such that 0(%rax) points to
+-	_dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct
+-	tlsdesc_dynamic_arg object.  It must return in %rax the offset
+-	between the thread pointer and the object denoted by the
+-	argument, without clobbering any registers.
+-
+-	The assembly code that follows is a rendition of the following
+-	C code, hand-optimized a little bit.
+-
+-ptrdiff_t
+-_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
+-{
+-  struct tlsdesc_dynamic_arg *td = tdp->arg;
+-  dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
+-  if (__builtin_expect (td->gen_count <= dtv[0].counter
+-			&& (dtv[td->tlsinfo.ti_module].pointer.val
+-			    != TLS_DTV_UNALLOCATED),
+-			1))
+-    return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
+-      - __thread_pointer;
+-
+-  return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer;
+-}
+-*/
+-	cfi_startproc
+-	.align 16
+-_dl_tlsdesc_dynamic:
+-	_CET_ENDBR
+-	/* Preserve call-clobbered registers that we modify.
+-	   We need two scratch regs anyway.  */
+-	movq	%rsi, -16(%rsp)
+-	mov	%fs:DTV_OFFSET, %RSI_LP
+-	movq	%rdi, -8(%rsp)
+-	movq	TLSDESC_ARG(%rax), %rdi
+-	movq	(%rsi), %rax
+-	cmpq	%rax, TLSDESC_GEN_COUNT(%rdi)
+-	ja	.Lslow
+-	movq	TLSDESC_MODID(%rdi), %rax
+-	salq	$4, %rax
+-	movq	(%rax,%rsi), %rax
+-	cmpq	$-1, %rax
+-	je	.Lslow
+-	addq	TLSDESC_MODOFF(%rdi), %rax
+-.Lret:
+-	movq	-16(%rsp), %rsi
+-	sub	%fs:0, %RAX_LP
+-	movq	-8(%rsp), %rdi
+-	ret
+-.Lslow:
+-	/* Besides rdi and rsi, saved above, save rdx, rcx, r8, r9,
+-	   r10 and r11.  Also, align the stack, that's off by 8 bytes.	*/
+-	subq	$72, %rsp
+-	cfi_adjust_cfa_offset (72)
+-	movq	%rdx, 8(%rsp)
+-	movq	%rcx, 16(%rsp)
+-	movq	%r8, 24(%rsp)
+-	movq	%r9, 32(%rsp)
+-	movq	%r10, 40(%rsp)
+-	movq	%r11, 48(%rsp)
+-	/* %rdi already points to the tlsinfo data structure.  */
+-	call	HIDDEN_JUMPTARGET (__tls_get_addr)
+-	movq	8(%rsp), %rdx
+-	movq	16(%rsp), %rcx
+-	movq	24(%rsp), %r8
+-	movq	32(%rsp), %r9
+-	movq	40(%rsp), %r10
+-	movq	48(%rsp), %r11
+-	addq	$72, %rsp
+-	cfi_adjust_cfa_offset (-72)
+-	jmp	.Lret
+-	cfi_endproc
+-	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
++# define USE_FXSAVE
++# define STATE_SAVE_ALIGNMENT	16
++# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_fxsave
++# include "dl-tlsdesc-dynamic.h"
++# undef _dl_tlsdesc_dynamic
++# undef USE_FXSAVE
++
++# define USE_XSAVE
++# define STATE_SAVE_ALIGNMENT	64
++# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_xsave
++# include "dl-tlsdesc-dynamic.h"
++# undef _dl_tlsdesc_dynamic
++# undef USE_XSAVE
++
++# define USE_XSAVEC
++# define STATE_SAVE_ALIGNMENT	64
++# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_xsavec
++# include "dl-tlsdesc-dynamic.h"
++# undef _dl_tlsdesc_dynamic
++# undef USE_XSAVEC
+ #endif /* SHARED */
+diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/sysdeps/x86_64/dl-trampoline-save.h
+new file mode 100644
+index 0000000000..84eac4a8ac
+--- /dev/null
++++ b/sysdeps/x86_64/dl-trampoline-save.h
+@@ -0,0 +1,34 @@
++/* x86-64 PLT trampoline register save macros.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#ifndef DL_STACK_ALIGNMENT
++/* Due to GCC bug:
++
++   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
++
++   __tls_get_addr may be called with 8-byte stack alignment.  Although
++   this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
++   that stack will be always aligned at 16 bytes.  */
++# define DL_STACK_ALIGNMENT 8
++#endif
++
++/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
++   stack to 16 bytes before calling _dl_fixup.  */
++#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
++  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
++   || 16 > DL_STACK_ALIGNMENT)
+diff --git a/sysdeps/x86_64/dl-trampoline-state.h b/sysdeps/x86_64/dl-trampoline-state.h
+new file mode 100644
+index 0000000000..575f120797
+--- /dev/null
++++ b/sysdeps/x86_64/dl-trampoline-state.h
+@@ -0,0 +1,51 @@
++/* x86-64 PLT dl-trampoline state macros.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#if (STATE_SAVE_ALIGNMENT % 16) != 0
++# error STATE_SAVE_ALIGNMENT must be multiple of 16
++#endif
++
++#if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
++# error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT
++#endif
++
++#if DL_RUNTIME_RESOLVE_REALIGN_STACK
++/* Local stack area before jumping to function address: RBX.  */
++# define LOCAL_STORAGE_AREA	8
++# define BASE			rbx
++# ifdef USE_FXSAVE
++/* Use fxsave to save XMM registers.  */
++#  define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET)
++#  if (REGISTER_SAVE_AREA % 16) != 0
++#   error REGISTER_SAVE_AREA must be multiple of 16
++#  endif
++# endif
++#else
++# ifndef USE_FXSAVE
++#  error USE_FXSAVE must be defined
++# endif
++/* Use fxsave to save XMM registers.  */
++# define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET + 8)
++/* Local stack area before jumping to function address:  All saved
++   registers.  */
++# define LOCAL_STORAGE_AREA	REGISTER_SAVE_AREA
++# define BASE			rsp
++# if (REGISTER_SAVE_AREA % 16) != 8
++#  error REGISTER_SAVE_AREA must be odd multiple of 8
++# endif
++#endif
+diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
+index b2e7e0f69b..87c5137837 100644
+--- a/sysdeps/x86_64/dl-trampoline.S
++++ b/sysdeps/x86_64/dl-trampoline.S
+@@ -22,25 +22,7 @@
+ #include <features-offsets.h>
+ #include <link-defines.h>
+ #include <isa-level.h>
+-
+-#ifndef DL_STACK_ALIGNMENT
+-/* Due to GCC bug:
+-
+-   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
+-
+-   __tls_get_addr may be called with 8-byte stack alignment.  Although
+-   this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
+-   that stack will be always aligned at 16 bytes.  We use unaligned
+-   16-byte move to load and store SSE registers, which has no penalty
+-   on modern processors if stack is 16-byte aligned.  */
+-# define DL_STACK_ALIGNMENT 8
+-#endif
+-
+-/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
+-   stack to 16 bytes before calling _dl_fixup.  */
+-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
+-  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
+-   || 16 > DL_STACK_ALIGNMENT)
++#include "dl-trampoline-save.h"
+ 
+ /* Area on stack to save and restore registers used for parameter
+    passing when calling _dl_fixup.  */
+diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
+index f55c6ea040..d9ccfb40d4 100644
+--- a/sysdeps/x86_64/dl-trampoline.h
++++ b/sysdeps/x86_64/dl-trampoline.h
+@@ -27,39 +27,7 @@
+ # undef LOCAL_STORAGE_AREA
+ # undef BASE
+ 
+-# if (STATE_SAVE_ALIGNMENT % 16) != 0
+-#  error STATE_SAVE_ALIGNMENT must be multiple of 16
+-# endif
+-
+-# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
+-#  error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT
+-# endif
+-
+-# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+-/* Local stack area before jumping to function address: RBX.  */
+-#  define LOCAL_STORAGE_AREA	8
+-#  define BASE			rbx
+-#  ifdef USE_FXSAVE
+-/* Use fxsave to save XMM registers.  */
+-#   define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET)
+-#   if (REGISTER_SAVE_AREA % 16) != 0
+-#    error REGISTER_SAVE_AREA must be multiple of 16
+-#   endif
+-#  endif
+-# else
+-#  ifndef USE_FXSAVE
+-#   error USE_FXSAVE must be defined
+-#  endif
+-/* Use fxsave to save XMM registers.  */
+-#  define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET + 8)
+-/* Local stack area before jumping to function address:  All saved
+-   registers.  */
+-#  define LOCAL_STORAGE_AREA	REGISTER_SAVE_AREA
+-#  define BASE			rsp
+-#  if (REGISTER_SAVE_AREA % 16) != 8
+-#   error REGISTER_SAVE_AREA must be odd multiple of 8
+-#  endif
+-# endif
++# include "dl-trampoline-state.h"
+ 
+ 	.globl _dl_runtime_resolve
+ 	.hidden _dl_runtime_resolve
+
+commit 853e915fdd6ae6c5f1a7a68d2594ec8dbfef1286
+Author: H.J. Lu <hjl.tools@gmail.com>
+Date:   Wed Feb 28 12:08:03 2024 -0800
+
+    x86-64: Update _dl_tlsdesc_dynamic to preserve AMX registers
+    
+    _dl_tlsdesc_dynamic should also preserve AMX registers which are
+    caller-saved.  Add X86_XSTATE_TILECFG_ID and X86_XSTATE_TILEDATA_ID
+    to x86-64 TLSDESC_CALL_STATE_SAVE_MASK.  Compute the AMX state size
+    and save it in xsave_state_full_size which is only used by
+    _dl_tlsdesc_dynamic_xsave and _dl_tlsdesc_dynamic_xsavec.  This fixes
+    the AMX part of BZ #31372.  Tested on AMX processor.
+    
+    AMX test is enabled only for compilers with the fix for
+    
+    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114098
+    
+    GCC 14 and GCC 11/12/13 branches have the bug fix.
+    Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
+    
+    (cherry picked from commit 9b7091415af47082664717210ac49d51551456ab)
+
+diff --git a/sysdeps/unix/sysv/linux/x86_64/Makefile b/sysdeps/unix/sysv/linux/x86_64/Makefile
+index 4223feb95f..9a1e7aa646 100644
+--- a/sysdeps/unix/sysv/linux/x86_64/Makefile
++++ b/sysdeps/unix/sysv/linux/x86_64/Makefile
+@@ -63,6 +63,33 @@ $(objpfx)libx86-64-isa-level%.os: $(..)/sysdeps/unix/sysv/linux/x86_64/x86-64-is
+ $(objpfx)libx86-64-isa-level.so: $(objpfx)libx86-64-isa-level-1.so
+ 	cp $< $@
+ endif
++
++ifeq (yes,$(have-mamx-tile))
++tests += \
++  tst-gnu2-tls2-amx \
++# tests
++
++modules-names += \
++  tst-gnu2-tls2-amx-mod0 \
++  tst-gnu2-tls2-amx-mod1 \
++  tst-gnu2-tls2-amx-mod2 \
++# modules-names
++
++$(objpfx)tst-gnu2-tls2-amx: $(shared-thread-library)
++$(objpfx)tst-gnu2-tls2-amx.out: \
++  $(objpfx)tst-gnu2-tls2-amx-mod0.so \
++  $(objpfx)tst-gnu2-tls2-amx-mod1.so \
++  $(objpfx)tst-gnu2-tls2-amx-mod2.so
++$(objpfx)tst-gnu2-tls2-amx-mod0.so: $(libsupport)
++$(objpfx)tst-gnu2-tls2-amx-mod1.so: $(libsupport)
++$(objpfx)tst-gnu2-tls2-amx-mod2.so: $(libsupport)
++
++CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile
++CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -mtls-dialect=gnu2
++CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -mtls-dialect=gnu2
++CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -mtls-dialect=gnu2
++endif
++
+ endif # $(subdir) == elf
+ 
+ ifneq ($(enable-cet),no)
+diff --git a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
+index 2f511321ad..ef4631bf4b 100644
+--- a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
++++ b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
+@@ -20,3 +20,8 @@
+ # define ARCH_SHSTK_SHSTK		0x1
+ # define ARCH_SHSTK_WRSS		0x2
+ #endif
++
++#ifndef ARCH_GET_XCOMP_PERM
++# define ARCH_GET_XCOMP_PERM		0x1022
++# define ARCH_REQ_XCOMP_PERM		0x1023
++#endif
+diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
+new file mode 100644
+index 0000000000..2e0c7b91b7
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
+@@ -0,0 +1,2 @@
++#include "tst-gnu2-tls2-amx.h"
++#include <tst-gnu2-tls2mod0.c>
+diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
+new file mode 100644
+index 0000000000..b8a8ccf1c1
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
+@@ -0,0 +1,2 @@
++#include "tst-gnu2-tls2-amx.h"
++#include <tst-gnu2-tls2mod1.c>
+diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
+new file mode 100644
+index 0000000000..cdf4a8f363
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
+@@ -0,0 +1,2 @@
++#include "tst-gnu2-tls2-amx.h"
++#include <tst-gnu2-tls2mod2.c>
+diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
+new file mode 100644
+index 0000000000..ae4dd82556
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
+@@ -0,0 +1,83 @@
++/* Test TLSDESC relocation with AMX.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include <stdbool.h>
++#include <asm/prctl.h>
++#include <support/check.h>
++#include "tst-gnu2-tls2-amx.h"
++
++extern int arch_prctl (int, ...);
++
++#define X86_XSTATE_TILECFG_ID	17
++#define X86_XSTATE_TILEDATA_ID	18
++
++/* Initialize tile config.  */
++__attribute__ ((noinline, noclone))
++static void
++init_tile_config (__tilecfg *tileinfo)
++{
++  int i;
++  tileinfo->palette_id = 1;
++  tileinfo->start_row = 0;
++
++  tileinfo->colsb[0] = MAX_ROWS;
++  tileinfo->rows[0] = MAX_ROWS;
++
++  for (i = 1; i < 4; ++i)
++  {
++    tileinfo->colsb[i] = MAX_COLS;
++    tileinfo->rows[i] = MAX_ROWS;
++  }
++
++  _tile_loadconfig (tileinfo);
++}
++
++static bool
++enable_amx (void)
++{
++  uint64_t bitmask;
++  if (arch_prctl (ARCH_GET_XCOMP_PERM, &bitmask) != 0)
++    return false;
++
++  if ((bitmask & (1 << X86_XSTATE_TILECFG_ID)) == 0)
++    return false;
++
++  if (arch_prctl (ARCH_REQ_XCOMP_PERM, X86_XSTATE_TILEDATA_ID) != 0)
++    return false;
++
++  /* Load tile configuration.  */
++  __tilecfg tile_data = { 0 };
++  init_tile_config (&tile_data);
++
++  return true;
++}
++
++/* An architecture can define it to clobber caller-saved registers in
++   malloc below to verify that the implicit TLSDESC call won't change
++   caller-saved registers.  */
++static void
++clear_tile_register (void)
++{
++  _tile_zero (2);
++}
++
++#define MOD(i) "tst-gnu2-tls2-amx-mod" #i ".so"
++#define IS_SUPPORTED()	enable_amx ()
++#define PREPARE_MALLOC() clear_tile_register ()
++
++#include <elf/tst-gnu2-tls2.c>
+diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
+new file mode 100644
+index 0000000000..1845a3caba
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
+@@ -0,0 +1,63 @@
++/* Test TLSDESC relocation with AMX.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include <stdint.h>
++#include <string.h>
++#include <x86intrin.h>
++#include <support/check.h>
++
++#define MAX_ROWS 16
++#define MAX_COLS 64
++#define MAX 1024
++#define STRIDE 64
++
++typedef struct __tile_config
++{
++  uint8_t palette_id;
++  uint8_t start_row;
++  uint8_t reserved_0[14];
++  uint16_t colsb[16];
++  uint8_t rows[16];
++} __tilecfg __attribute__ ((aligned (64)));
++
++/* Initialize int8_t buffer */
++static inline void
++init_buffer (int8_t *buf, int8_t value)
++{
++  int rows, colsb, i, j;
++  rows  = MAX_ROWS;
++  colsb = MAX_COLS;
++
++  for (i = 0; i < rows; i++)
++    for (j = 0; j < colsb; j++)
++      buf[i * colsb + j] = value;
++}
++
++#define BEFORE_TLSDESC_CALL()					\
++  int8_t src[MAX];						\
++  int8_t res[MAX];						\
++  /* Initialize src with data  */				\
++  init_buffer (src, 2);						\
++  /* Load tile rows from memory.  */				\
++  _tile_loadd (2, src, STRIDE);
++
++#define AFTER_TLSDESC_CALL()					\
++  /* Store the tile data to memory.  */				\
++  _tile_stored (2, res, STRIDE);				\
++  _tile_release ();						\
++  TEST_VERIFY_EXIT (memcmp (src, res, sizeof (res)) == 0);
+diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym
+index 6a8fd29813..21fc88d651 100644
+--- a/sysdeps/x86/cpu-features-offsets.sym
++++ b/sysdeps/x86/cpu-features-offsets.sym
+@@ -3,3 +3,4 @@
+ #include <ldsodefs.h>
+ 
+ XSAVE_STATE_SIZE_OFFSET	offsetof (struct cpu_features, xsave_state_size)
++XSAVE_STATE_FULL_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_full_size)
+diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
+index 835113b42f..d71e8d3d2e 100644
+--- a/sysdeps/x86/cpu-features.c
++++ b/sysdeps/x86/cpu-features.c
+@@ -307,6 +307,8 @@ update_active (struct cpu_features *cpu_features)
+ 	  __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
+ 	  if (ebx != 0)
+ 	    {
++	      /* NB: On AMX capable processors, ebx always includes AMX
++		 states.  */
+ 	      unsigned int xsave_state_full_size
+ 		= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
+ 
+@@ -320,6 +322,11 @@ update_active (struct cpu_features *cpu_features)
+ 		{
+ 		  unsigned int xstate_comp_offsets[32];
+ 		  unsigned int xstate_comp_sizes[32];
++#ifdef __x86_64__
++		  unsigned int xstate_amx_comp_offsets[32];
++		  unsigned int xstate_amx_comp_sizes[32];
++		  unsigned int amx_ecx;
++#endif
+ 		  unsigned int i;
+ 
+ 		  xstate_comp_offsets[0] = 0;
+@@ -327,16 +334,39 @@ update_active (struct cpu_features *cpu_features)
+ 		  xstate_comp_offsets[2] = 576;
+ 		  xstate_comp_sizes[0] = 160;
+ 		  xstate_comp_sizes[1] = 256;
++#ifdef __x86_64__
++		  xstate_amx_comp_offsets[0] = 0;
++		  xstate_amx_comp_offsets[1] = 160;
++		  xstate_amx_comp_offsets[2] = 576;
++		  xstate_amx_comp_sizes[0] = 160;
++		  xstate_amx_comp_sizes[1] = 256;
++#endif
+ 
+ 		  for (i = 2; i < 32; i++)
+ 		    {
+-		      if ((STATE_SAVE_MASK & (1 << i)) != 0)
++		      if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
+ 			{
+ 			  __cpuid_count (0xd, i, eax, ebx, ecx, edx);
+-			  xstate_comp_sizes[i] = eax;
++#ifdef __x86_64__
++			  /* Include this in xsave_state_full_size.  */
++			  amx_ecx = ecx;
++			  xstate_amx_comp_sizes[i] = eax;
++			  if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0)
++			    {
++			      /* Exclude this from xsave_state_size.  */
++			      ecx = 0;
++			      xstate_comp_sizes[i] = 0;
++			    }
++			  else
++#endif
++			    xstate_comp_sizes[i] = eax;
+ 			}
+ 		      else
+ 			{
++#ifdef __x86_64__
++			  amx_ecx = 0;
++			  xstate_amx_comp_sizes[i] = 0;
++#endif
+ 			  ecx = 0;
+ 			  xstate_comp_sizes[i] = 0;
+ 			}
+@@ -349,6 +379,15 @@ update_active (struct cpu_features *cpu_features)
+ 			  if ((ecx & (1 << 1)) != 0)
+ 			    xstate_comp_offsets[i]
+ 			      = ALIGN_UP (xstate_comp_offsets[i], 64);
++#ifdef __x86_64__
++			  xstate_amx_comp_offsets[i]
++			    = (xstate_amx_comp_offsets[i - 1]
++			       + xstate_amx_comp_sizes[i - 1]);
++			  if ((amx_ecx & (1 << 1)) != 0)
++			    xstate_amx_comp_offsets[i]
++			      = ALIGN_UP (xstate_amx_comp_offsets[i],
++					  64);
++#endif
+ 			}
+ 		    }
+ 
+@@ -357,6 +396,18 @@ update_active (struct cpu_features *cpu_features)
+ 		    = xstate_comp_offsets[31] + xstate_comp_sizes[31];
+ 		  if (size)
+ 		    {
++#ifdef __x86_64__
++		      unsigned int amx_size
++			= (xstate_amx_comp_offsets[31]
++			   + xstate_amx_comp_sizes[31]);
++		      amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
++					   64);
++		      /* Set xsave_state_full_size to the compact AMX
++			 state size for XSAVEC.  NB: xsave_state_full_size
++			 is only used in _dl_tlsdesc_dynamic_xsave and
++			 _dl_tlsdesc_dynamic_xsavec.  */
++		      cpu_features->xsave_state_full_size = amx_size;
++#endif
+ 		      cpu_features->xsave_state_size
+ 			= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
+ 		      CPU_FEATURE_SET (cpu_features, XSAVEC);
+diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
+index b9bf3115b6..cd7bd27cf3 100644
+--- a/sysdeps/x86/include/cpu-features.h
++++ b/sysdeps/x86/include/cpu-features.h
+@@ -934,6 +934,8 @@ struct cpu_features
+   /* The full state size for XSAVE when XSAVEC is disabled by
+ 
+      GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC
++
++     and the AMX state size when XSAVEC is available.
+    */
+   unsigned int xsave_state_full_size;
+   /* Data cache size for use in memory and string routines, typically
+diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
+index 485cad9c02..db8e576e91 100644
+--- a/sysdeps/x86/sysdep.h
++++ b/sysdeps/x86/sysdep.h
+@@ -56,6 +56,14 @@
+    | (1 << X86_XSTATE_ZMM_H_ID) 	\
+    | (1 << X86_XSTATE_ZMM_ID)		\
+    | (1 << X86_XSTATE_APX_F_ID))
++
++/* AMX state mask.  */
++# define AMX_STATE_SAVE_MASK		\
++  ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID))
++
++/* States to be included in xsave_state_full_size.  */
++# define FULL_STATE_SAVE_MASK		\
++  (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
+ #else
+ /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
+    doesn't have red-zone, use 0 here.  */
+@@ -68,13 +76,17 @@
+    | (1 << X86_XSTATE_BNDREGS_ID)	\
+    | (1 << X86_XSTATE_K_ID)		\
+    | (1 << X86_XSTATE_ZMM_H_ID))
++
++/* States to be included in xsave_state_size.  */
++# define FULL_STATE_SAVE_MASK		STATE_SAVE_MASK
+ #endif
+ 
+ /* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
+-   Compiler assumes that all registers, including x87 FPU stack registers,
+-   are unchanged after CALL, except for EFLAGS and RAX/EAX.  */
++   Compiler assumes that all registers, including AMX and x87 FPU
++   stack registers, are unchanged after CALL, except for EFLAGS and
++   RAX/EAX.  */
+ #define TLSDESC_CALL_STATE_SAVE_MASK	\
+-  (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
++  (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
+ 
+ /* Constants for bits in __x86_string_control:  */
+ 
+diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
+index 418cc4a9b8..04a534fa12 100755
+--- a/sysdeps/x86_64/configure
++++ b/sysdeps/x86_64/configure
+@@ -134,6 +134,34 @@ fi
+ config_vars="$config_vars
+ enable-cet = $enable_cet"
+ 
++# Check if -mamx-tile works properly.
++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -mamx-tile works properly" >&5
++printf %s "checking whether -mamx-tile works properly... " >&6; }
++if test ${libc_cv_x86_have_amx_tile+y}
++then :
++  printf %s "(cached) " >&6
++else $as_nop
++  cat > conftest.c <<EOF
++#include <x86intrin.h>
++EOF
++	       libc_cv_x86_have_amx_tile=no
++	       if { ac_try='${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i'
++  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
++  (eval $ac_try) 2>&5
++  ac_status=$?
++  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
++  test $ac_status = 0; }; }; then
++		 if grep -q __builtin_ia32_ldtilecfg conftest.i; then
++		   libc_cv_x86_have_amx_tile=yes
++	         fi
++	       fi
++	       rm -rf conftest*
++fi
++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_x86_have_amx_tile" >&5
++printf "%s\n" "$libc_cv_x86_have_amx_tile" >&6; }
++config_vars="$config_vars
++have-mamx-tile = $libc_cv_x86_have_amx_tile"
++
+ test -n "$critic_missing" && as_fn_error $? "
+ *** $critic_missing" "$LINENO" 5
+ 
+diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
+index d1f803c02e..c714c47351 100644
+--- a/sysdeps/x86_64/configure.ac
++++ b/sysdeps/x86_64/configure.ac
+@@ -61,5 +61,20 @@ elif test $enable_cet = permissive; then
+ fi
+ LIBC_CONFIG_VAR([enable-cet], [$enable_cet])
+ 
++# Check if -mamx-tile works properly.
++AC_CACHE_CHECK(whether -mamx-tile works properly,
++	       libc_cv_x86_have_amx_tile, [dnl
++cat > conftest.c <<EOF
++#include <x86intrin.h>
++EOF
++	       libc_cv_x86_have_amx_tile=no
++	       if AC_TRY_COMMAND(${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i); then
++		 if grep -q __builtin_ia32_ldtilecfg conftest.i; then
++		   libc_cv_x86_have_amx_tile=yes
++	         fi
++	       fi
++	       rm -rf conftest*])
++LIBC_CONFIG_VAR([have-mamx-tile], [$libc_cv_x86_have_amx_tile])
++
+ test -n "$critic_missing" && AC_MSG_ERROR([
+ *** $critic_missing])
+diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
+index 0c2e8d5320..9f02cfc3eb 100644
+--- a/sysdeps/x86_64/dl-tlsdesc-dynamic.h
++++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
+@@ -99,7 +99,7 @@ _dl_tlsdesc_dynamic:
+ # endif
+ #else
+ 	/* Allocate stack space of the required size to save the state.  */
+-	sub	_rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
++	sub	_rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip), %RSP_LP
+ #endif
+ 	/* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
+ 	   r10 and r11.  */
+
+commit 354cabcb2634abe16da7a2ba5e648aac1204b58e
+Author: H.J. Lu <hjl.tools@gmail.com>
+Date:   Mon Mar 18 06:40:16 2024 -0700
+
+    x86-64: Allocate state buffer space for RDI, RSI and RBX
+    
+    _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack.
+    After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11.  Define
+    TLSDESC_CALL_REGISTER_SAVE_AREA to allocate space for RDI, RSI and RBX
+    to avoid clobbering saved RDI, RSI and RBX values on stack by xsave to
+    STATE_SAVE_OFFSET(%rsp).
+    
+       +==================+<- stack frame start aligned at 8 or 16 bytes
+       |                  |<- RDI saved in the red zone
+       |                  |<- RSI saved in the red zone
+       |                  |<- RBX saved in the red zone
+       |                  |<- paddings for stack realignment of 64 bytes
+       |------------------|<- xsave buffer end aligned at 64 bytes
+       |                  |<-
+       |                  |<-
+       |                  |<-
+       |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
+       |                  |<- 8-byte padding for 64-byte alignment
+       |                  |<- 8-byte padding for 64-byte alignment
+       |                  |<- R11
+       |                  |<- R10
+       |                  |<- R9
+       |                  |<- R8
+       |                  |<- RDX
+       |                  |<- RCX
+       +==================+<- RSP aligned at 64 bytes
+    
+    Define TLSDESC_CALL_REGISTER_SAVE_AREA, the total register save area size
+    for all integer registers by adding 24 to STATE_SAVE_OFFSET since RDI, RSI
+    and RBX are saved onto stack without adjusting stack pointer first, using
+    the red-zone.  This fixes BZ #31501.
+    Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
+    
+    (cherry picked from commit 717ebfa85c8240d32d0d19d86a484c31c55c9617)
+
+diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
+index d71e8d3d2e..6fe1b728c6 100644
+--- a/sysdeps/x86/cpu-features.c
++++ b/sysdeps/x86/cpu-features.c
+@@ -310,7 +310,7 @@ update_active (struct cpu_features *cpu_features)
+ 	      /* NB: On AMX capable processors, ebx always includes AMX
+ 		 states.  */
+ 	      unsigned int xsave_state_full_size
+-		= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
++		= ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
+ 
+ 	      cpu_features->xsave_state_size
+ 		= xsave_state_full_size;
+@@ -400,8 +400,10 @@ update_active (struct cpu_features *cpu_features)
+ 		      unsigned int amx_size
+ 			= (xstate_amx_comp_offsets[31]
+ 			   + xstate_amx_comp_sizes[31]);
+-		      amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
+-					   64);
++		      amx_size
++			= ALIGN_UP ((amx_size
++				     + TLSDESC_CALL_REGISTER_SAVE_AREA),
++				    64);
+ 		      /* Set xsave_state_full_size to the compact AMX
+ 			 state size for XSAVEC.  NB: xsave_state_full_size
+ 			 is only used in _dl_tlsdesc_dynamic_xsave and
+@@ -409,7 +411,8 @@ update_active (struct cpu_features *cpu_features)
+ 		      cpu_features->xsave_state_full_size = amx_size;
+ #endif
+ 		      cpu_features->xsave_state_size
+-			= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
++			= ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
++				    64);
+ 		      CPU_FEATURE_SET (cpu_features, XSAVEC);
+ 		    }
+ 		}
+diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
+index db8e576e91..7359149e17 100644
+--- a/sysdeps/x86/sysdep.h
++++ b/sysdeps/x86/sysdep.h
+@@ -38,14 +38,59 @@
+ #ifdef __x86_64__
+ /* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
+    space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
+-   aligned to 16 bytes for fxsave and 64 bytes for xsave.
+-
+-   NB: Is is non-zero because of the 128-byte red-zone.  Some registers
+-   are saved on stack without adjusting stack pointer first.  When we
+-   update stack pointer to allocate more space, we need to take the
+-   red-zone into account.  */
++   aligned to 16 bytes for fxsave and 64 bytes for xsave.  It is non-zero
++   because MOV, instead of PUSH, is used to save registers onto stack.
++
++   +==================+<- stack frame start aligned at 8 or 16 bytes
++   |                  |<- paddings for stack realignment of 64 bytes
++   |------------------|<- xsave buffer end aligned at 64 bytes
++   |                  |<-
++   |                  |<-
++   |                  |<-
++   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
++   |                  |<- 8-byte padding for 64-byte alignment
++   |                  |<- R9
++   |                  |<- R8
++   |                  |<- RDI
++   |                  |<- RSI
++   |                  |<- RDX
++   |                  |<- RCX
++   |                  |<- RAX
++   +==================+<- RSP aligned at 64 bytes
++
++ */
+ # define STATE_SAVE_OFFSET (8 * 7 + 8)
+ 
++/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
++   stack.  After realigning stack, it saves RCX, RDX, R8, R9, R10 and
++   R11.  Allocate space for RDI, RSI and RBX to avoid clobbering saved
++   RDI, RSI and RBX values on stack by xsave.
++
++   +==================+<- stack frame start aligned at 8 or 16 bytes
++   |                  |<- RDI saved in the red zone
++   |                  |<- RSI saved in the red zone
++   |                  |<- RBX saved in the red zone
++   |                  |<- paddings for stack realignment of 64 bytes
++   |------------------|<- xsave buffer end aligned at 64 bytes
++   |                  |<-
++   |                  |<-
++   |                  |<-
++   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
++   |                  |<- 8-byte padding for 64-byte alignment
++   |                  |<- 8-byte padding for 64-byte alignment
++   |                  |<- R11
++   |                  |<- R10
++   |                  |<- R9
++   |                  |<- R8
++   |                  |<- RDX
++   |                  |<- RCX
++   +==================+<- RSP aligned at 64 bytes
++
++   Define the total register save area size for all integer registers by
++   adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto
++   stack without adjusting stack pointer first, using the red-zone.  */
++# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24)
++
+ /* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
+    registers are mutually exclusive.  */
+ # define STATE_SAVE_MASK		\
+@@ -66,8 +111,9 @@
+   (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
+ #else
+ /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
+-   doesn't have red-zone, use 0 here.  */
++   uses PUSH to save registers onto stack, use 0 here.  */
+ # define STATE_SAVE_OFFSET 0
++# define TLSDESC_CALL_REGISTER_SAVE_AREA 0
+ 
+ /* Save SSE, AVX, AXV512, mask and bound registers.   */
+ # define STATE_SAVE_MASK		\
+diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
+new file mode 100644
+index 0000000000..1d636669ba
+--- /dev/null
++++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
+@@ -0,0 +1,87 @@
++/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++
++/* On AVX512 machines, OFFSET == 40 caused _dl_tlsdesc_dynamic_xsavec
++   to clobber %rdi, %rsi and %rbx.  On Intel AVX CPUs, the state size
++   is 960 bytes and this test didn't fail.  It may be due to the unused
++   last 128 bytes.  On AMD AVX CPUs, the state size is 832 bytes and
++   this test might fail without the fix.  */
++#ifndef OFFSET
++# define OFFSET 40
++#endif
++
++	.text
++	.p2align 4
++	.globl	apply_tls
++	.type	apply_tls, @function
++apply_tls:
++	cfi_startproc
++	_CET_ENDBR
++	pushq	%rbp
++	cfi_def_cfa_offset (16)
++	cfi_offset (6, -16)
++	movdqu	(%RDI_LP), %xmm0
++	lea	tls_var1@TLSDESC(%rip), %RAX_LP
++	mov	%RSP_LP, %RBP_LP
++	cfi_def_cfa_register (6)
++	/* Align stack to 64 bytes.  */
++	and	$-64, %RSP_LP
++	sub	$OFFSET, %RSP_LP
++	pushq	%rbx
++	/* Set %ebx to 0xbadbeef.  */
++	movl	$0xbadbeef, %ebx
++	movl	$0xbadbeef, %esi
++	movq	%rdi, saved_rdi(%rip)
++	movq	%rsi, saved_rsi(%rip)
++	call	*tls_var1@TLSCALL(%RAX_LP)
++	/* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx.  */
++	cmpq	saved_rdi(%rip), %rdi
++	jne	L(hlt)
++	cmpq	saved_rsi(%rip), %rsi
++	jne	L(hlt)
++	cmpl	$0xbadbeef, %ebx
++	jne	L(hlt)
++	add	%fs:0, %RAX_LP
++	movups	%xmm0, 32(%RAX_LP)
++	movdqu	16(%RDI_LP), %xmm1
++	mov	%RAX_LP, %RBX_LP
++	movups	%xmm1, 48(%RAX_LP)
++	lea	32(%RBX_LP), %RAX_LP
++	pop	%rbx
++	leave
++	cfi_def_cfa (7, 8)
++	ret
++L(hlt):
++	hlt
++	cfi_endproc
++	.size	apply_tls, .-apply_tls
++	.hidden	tls_var1
++	.globl	tls_var1
++	.section	.tbss,"awT",@nobits
++	.align 16
++	.type	tls_var1, @object
++	.size	tls_var1, 3200
++tls_var1:
++	.zero	3200
++	.local	saved_rdi
++	.comm	saved_rdi,8,8
++	.local	saved_rsi
++	.comm	saved_rsi,8,8
++	.section	.note.GNU-stack,"",@progbits
+
+commit 15aebdbada54098787715448c94701f17033fc92
+Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
+Date:   Tue Mar 12 13:21:18 2024 -0300
+
+    Ignore undefined symbols for -mtls-dialect=gnu2
+    
+    So it does not fail for arm config that defaults to -mtp=soft (which
+    issues a call to __aeabi_read_tp).
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    
+    (cherry picked from commit 968b0ca9440040a2b31248a572891f0e55c1ab10)
+
+diff --git a/configure b/configure
+index 59ff1e415d..117b48a421 100755
+--- a/configure
++++ b/configure
+@@ -7020,7 +7020,7 @@ void foo (void)
+ }
+ EOF
+ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
+-		   conftest.c -o conftest 1>&5'
++		   -shared conftest.c -o conftest 1>&5'
+   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+   (eval $ac_try) 2>&5
+   ac_status=$?
+diff --git a/configure.ac b/configure.ac
+index 65799e5685..19b88a47a5 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -1297,7 +1297,7 @@ void foo (void)
+ }
+ EOF
+ if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
+-		   conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD])
++		   -shared conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD])
+ then
+   libc_cv_mtls_dialect_gnu2=yes
+ else
+
+commit a8ba52bde58c69f2b31da62ad2311f119adf6cb9
+Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
+Date:   Tue Mar 12 13:21:19 2024 -0300
+
+    arm: Update _dl_tlsdesc_dynamic to preserve caller-saved registers (BZ 31372)
+    
+    ARM _dl_tlsdesc_dynamic slow path has two issues:
+    
+      * The ip/r12 is defined by AAPCS as a scratch register, and gcc is
+        used to save the stack pointer before on some function calls.  So it
+        should also be saved/restored as well.  It fixes the tst-gnu2-tls2.
+    
+      * None of the possible VFP registers are saved/restored.  ARM has the
+        additional complexity to have different VFP bank sizes (depending of
+        VFP support by the chip).
+    
+    The tst-gnu2-tls2 test is extended to check for VFP registers, although
+    only for hardfp builds.  Different than setcontext, _dl_tlsdesc_dynamic
+    does not have  HWCAP_ARM_IWMMXT (I don't have a way to properly test
+    it and it is almost a decade since newer hardware was released).
+    
+    With this patch there is no need to mark tst-gnu2-tls2 as XFAIL.
+    
+    Checked on arm-linux-gnueabihf.
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    
+    (cherry picked from commit 64c7e344289ed085517c2227d8e3b06388242c13)
+
+diff --git a/config.h.in b/config.h.in
+index 44a34072a4..4d33c63a84 100644
+--- a/config.h.in
++++ b/config.h.in
+@@ -141,6 +141,9 @@
+ /* LOONGARCH floating-point ABI for ld.so.  */
+ #undef LOONGARCH_ABI_FRLEN
+ 
++/* Define whether ARM used hard-float and support VFPvX-D32.  */
++#undef HAVE_ARM_PCS_VFP_D32
++
+ /* Linux specific: minimum supported kernel version.  */
+ #undef	__LINUX_KERNEL_VERSION
+ 
+diff --git a/elf/Makefile b/elf/Makefile
+index c5c37a9147..030db4d207 100644
+--- a/elf/Makefile
++++ b/elf/Makefile
+@@ -3056,10 +3056,6 @@ $(objpfx)tst-gnu2-tls2.out: \
+   $(objpfx)tst-gnu2-tls2mod2.so
+ 
+ ifeq (yes,$(have-mtls-dialect-gnu2))
+-# This test fails if dl_tlsdesc_dynamic doesn't preserve all caller-saved
+-# registers.  See https://sourceware.org/bugzilla/show_bug.cgi?id=31372
+-test-xfail-tst-gnu2-tls2 = yes
+-
+ CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2
+ CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2
+ CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2
+diff --git a/elf/tst-gnu2-tls2.h b/elf/tst-gnu2-tls2.h
+index 77964a57a3..1ade8151e2 100644
+--- a/elf/tst-gnu2-tls2.h
++++ b/elf/tst-gnu2-tls2.h
+@@ -27,6 +27,10 @@ extern struct tls *apply_tls (struct tls *);
+ 
+ /* An architecture can define them to verify that clobber caller-saved
+    registers aren't changed by the implicit TLSDESC call.  */
++#ifndef INIT_TLSDESC_CALL
++# define INIT_TLSDESC_CALL()
++#endif
++
+ #ifndef BEFORE_TLSDESC_CALL
+ # define BEFORE_TLSDESC_CALL()
+ #endif
+diff --git a/elf/tst-gnu2-tls2mod0.c b/elf/tst-gnu2-tls2mod0.c
+index 45556a0e17..3fe3c14277 100644
+--- a/elf/tst-gnu2-tls2mod0.c
++++ b/elf/tst-gnu2-tls2mod0.c
+@@ -16,13 +16,14 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include "tst-gnu2-tls2.h"
++#include <tst-gnu2-tls2.h>
+ 
+ __thread struct tls tls_var0 __attribute__ ((visibility ("hidden")));
+ 
+ struct tls *
+ apply_tls (struct tls *p)
+ {
++  INIT_TLSDESC_CALL ();
+   BEFORE_TLSDESC_CALL ();
+   tls_var0 = *p;
+   struct tls *ret = &tls_var0;
+diff --git a/elf/tst-gnu2-tls2mod1.c b/elf/tst-gnu2-tls2mod1.c
+index e10b9dbc0a..e210538468 100644
+--- a/elf/tst-gnu2-tls2mod1.c
++++ b/elf/tst-gnu2-tls2mod1.c
+@@ -16,13 +16,14 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include "tst-gnu2-tls2.h"
++#include <tst-gnu2-tls2.h>
+ 
+ __thread struct tls tls_var1[100] __attribute__ ((visibility ("hidden")));
+ 
+ struct tls *
+ apply_tls (struct tls *p)
+ {
++  INIT_TLSDESC_CALL ();
+   BEFORE_TLSDESC_CALL ();
+   tls_var1[1] = *p;
+   struct tls *ret = &tls_var1[1];
+diff --git a/elf/tst-gnu2-tls2mod2.c b/elf/tst-gnu2-tls2mod2.c
+index 141af51e55..6d3031dc5f 100644
+--- a/elf/tst-gnu2-tls2mod2.c
++++ b/elf/tst-gnu2-tls2mod2.c
+@@ -16,13 +16,14 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include "tst-gnu2-tls2.h"
++#include <tst-gnu2-tls2.h>
+ 
+ __thread struct tls tls_var2 __attribute__ ((visibility ("hidden")));
+ 
+ struct tls *
+ apply_tls (struct tls *p)
+ {
++  INIT_TLSDESC_CALL ();
+   BEFORE_TLSDESC_CALL ();
+   tls_var2 = *p;
+   struct tls *ret = &tls_var2;
+diff --git a/sysdeps/arm/configure b/sysdeps/arm/configure
+index 35e2918922..4ef4d46cbd 100644
+--- a/sysdeps/arm/configure
++++ b/sysdeps/arm/configure
+@@ -187,6 +187,38 @@ else
+ default-abi = soft"
+ fi
+ 
++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether VFP supports 32 registers" >&5
++printf %s "checking whether VFP supports 32 registers... " >&6; }
++if test ${libc_cv_arm_pcs_vfp_d32+y}
++then :
++  printf %s "(cached) " >&6
++else $as_nop
++
++cat confdefs.h - <<_ACEOF >conftest.$ac_ext
++/* end confdefs.h.  */
++
++void foo (void)
++{
++  asm volatile ("vldr d16,=17" : : : "d16");
++}
++
++_ACEOF
++if ac_fn_c_try_compile "$LINENO"
++then :
++  libc_cv_arm_pcs_vfp_d32=yes
++else $as_nop
++  libc_cv_arm_pcs_vfp_d32=no
++fi
++rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
++fi
++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_arm_pcs_vfp_d32" >&5
++printf "%s\n" "$libc_cv_arm_pcs_vfp_d32" >&6; }
++if test "$libc_cv_arm_pcs_vfp_d32" = yes ;
++then
++  printf "%s\n" "#define HAVE_ARM_PCS_VFP_D32 1" >>confdefs.h
++
++fi
++
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PC-relative relocs in movw/movt work properly" >&5
+ printf %s "checking whether PC-relative relocs in movw/movt work properly... " >&6; }
+ if test ${libc_cv_arm_pcrel_movw+y}
+diff --git a/sysdeps/arm/configure.ac b/sysdeps/arm/configure.ac
+index 5172e30bbe..cd00ddc9d9 100644
+--- a/sysdeps/arm/configure.ac
++++ b/sysdeps/arm/configure.ac
+@@ -21,6 +21,21 @@ else
+   LIBC_CONFIG_VAR([default-abi], [soft])
+ fi
+ 
++AC_CACHE_CHECK([whether VFP supports 32 registers],
++		libc_cv_arm_pcs_vfp_d32, [
++AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
++void foo (void)
++{
++  asm volatile ("vldr d16,=17" : : : "d16");
++}
++]])],
++                [libc_cv_arm_pcs_vfp_d32=yes],
++                [libc_cv_arm_pcs_vfp_d32=no])])
++if test "$libc_cv_arm_pcs_vfp_d32" = yes ;
++then
++  AC_DEFINE(HAVE_ARM_PCS_VFP_D32)
++fi
++
+ AC_CACHE_CHECK([whether PC-relative relocs in movw/movt work properly],
+ 	       libc_cv_arm_pcrel_movw, [
+ cat > conftest.s <<\EOF
+diff --git a/sysdeps/arm/dl-tlsdesc.S b/sysdeps/arm/dl-tlsdesc.S
+index 764c56e70f..ada106521d 100644
+--- a/sysdeps/arm/dl-tlsdesc.S
++++ b/sysdeps/arm/dl-tlsdesc.S
+@@ -19,6 +19,7 @@
+ #include <sysdep.h>
+ #include <arm-features.h>
+ #include <tls.h>
++#include <rtld-global-offsets.h>
+ #include "tlsdesc.h"
+ 
+ 	.text
+@@ -83,14 +84,20 @@ _dl_tlsdesc_dynamic(struct tlsdesc *tdp)
+ 	.align 2
+ _dl_tlsdesc_dynamic:
+ 	/* Our calling convention is to clobber r0, r1 and the processor
+-	   flags.  All others that are modified must be saved */
+-	eabi_save ({r2,r3,r4,lr})
+-	push	{r2,r3,r4,lr}
+-	cfi_adjust_cfa_offset (16)
++	   flags.  All others that are modified must be saved.  r5 is
++	   used as the hwcap value to avoid reload after __tls_get_addr
++	   call.  If required we will save the vector register on the slow
++	   path.  */
++	eabi_save ({r2,r3,r4,r5,ip,lr})
++	push	{r2,r3,r4,r5,ip,lr}
++	cfi_adjust_cfa_offset (24)
+ 	cfi_rel_offset (r2,0)
+ 	cfi_rel_offset (r3,4)
+ 	cfi_rel_offset (r4,8)
+-	cfi_rel_offset (lr,12)
++	cfi_rel_offset (r5,12)
++	cfi_rel_offset (ip,16)
++	cfi_rel_offset (lr,20)
++
+ 	ldr	r1, [r0] /* td */
+ 	GET_TLS (lr)
+ 	mov	r4, r0 /* r4 = tp */
+@@ -113,22 +120,69 @@ _dl_tlsdesc_dynamic:
+ 	rsbne	r0, r4, r3
+ 	bne	2f
+ 1:	mov	r0, r1
++
++	/* Load the hwcap to check for vector support.  */
++	ldr     r2, 3f
++	ldr     r1, .Lrtld_global_ro
++0:	add     r2, pc, r2
++	ldr     r2, [r2, r1]
++	ldr     r5, [r2, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
++
++#ifdef __SOFTFP__
++	tst     r5, #HWCAP_ARM_VFP
++	beq     .Lno_vfp
++#endif
++
++	/* Store the VFP registers.  Don't use VFP instructions directly
++	   because this code is used in non-VFP multilibs.  */
++#define VFP_STACK_REQ (32*8 + 8)
++	sub	sp, sp, VFP_STACK_REQ
++	cfi_adjust_cfa_offset (VFP_STACK_REQ)
++	mov	r3, sp
++	.inst	0xeca30b20	/* vstmia r3!, {d0-d15} */
++	tst	r5, #HWCAP_ARM_VFPD32
++	beq	4f
++	.inst	0xece30b20	/* vstmia r3!, {d16-d31}  */
++	/* Store the floating-point status register.  */
++4:	.inst	0xeef12a10	/* vmrs	r2, fpscr */
++	str	r2, [r3]
++.Lno_vfp:
+ 	bl	__tls_get_addr
+ 	rsb	r0, r4, r0
++#ifdef __SOFTFP__
++	tst     r5, #HWCAP_ARM_VFP
++	beq     2f
++#endif
++	mov	r3, sp
++	.inst	0xecb30b20	/* vldmia r3!, {d0-d15}  */
++	tst	r5, #HWCAP_ARM_VFPD32
++	beq	5f
++	.inst	0xecf30b20	/* vldmia r3!, {d16-d31}  */
++	ldr	r4, [r3]
++5:	.inst	0xeee14a10	/* vmsr	fpscr, r4  */
++	add	sp, sp, VFP_STACK_REQ
++	cfi_adjust_cfa_offset (-VFP_STACK_REQ)
++
+ 2:
+ #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+      || defined (ARM_ALWAYS_BX))
+-	pop	{r2,r3,r4, lr}
+-	cfi_adjust_cfa_offset (-16)
++	pop	{r2,r3,r4,r5,ip, lr}
++	cfi_adjust_cfa_offset (-20)
+ 	cfi_restore (lr)
++	cfi_restore (ip)
++	cfi_restore (r5)
+ 	cfi_restore (r4)
+ 	cfi_restore (r3)
+ 	cfi_restore (r2)
+ 	bx	lr
+ #else
+-	pop	{r2,r3,r4, pc}
++	pop	{r2,r3,r4,r5,ip, pc}
+ #endif
+ 	eabi_fnend
+ 	cfi_endproc
+ 	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
++
++3:      .long   _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
++.Lrtld_global_ro:
++	.long   C_SYMBOL_NAME(_rtld_global_ro)(GOT)
+ #endif /* SHARED */
+diff --git a/sysdeps/arm/tst-gnu2-tls2.h b/sysdeps/arm/tst-gnu2-tls2.h
+new file mode 100644
+index 0000000000..e413ac21fb
+--- /dev/null
++++ b/sysdeps/arm/tst-gnu2-tls2.h
+@@ -0,0 +1,128 @@
++/* Test TLSDESC relocation.  ARM version.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <config.h>
++#include <sys/auxv.h>
++#include <string.h>
++#include <stdlib.h>
++#include <endian.h>
++
++#ifndef __SOFTFP__
++
++# ifdef HAVE_ARM_PCS_VFP_D32
++#  define SAVE_VFP_D32					\
++      asm volatile ("vldr d16,=17" : : : "d16");	\
++      asm volatile ("vldr d17,=18" : : : "d17");	\
++      asm volatile ("vldr d18,=19" : : : "d18");	\
++      asm volatile ("vldr d19,=20" : : : "d19");	\
++      asm volatile ("vldr d20,=21" : : : "d20");	\
++      asm volatile ("vldr d21,=22" : : : "d21");	\
++      asm volatile ("vldr d22,=23" : : : "d22");	\
++      asm volatile ("vldr d23,=24" : : : "d23");	\
++      asm volatile ("vldr d24,=25" : : : "d24");	\
++      asm volatile ("vldr d25,=26" : : : "d25");	\
++      asm volatile ("vldr d26,=27" : : : "d26");	\
++      asm volatile ("vldr d27,=28" : : : "d27");	\
++      asm volatile ("vldr d28,=29" : : : "d28");	\
++      asm volatile ("vldr d29,=30" : : : "d29");	\
++      asm volatile ("vldr d30,=31" : : : "d30");	\
++      asm volatile ("vldr d31,=32" : : : "d31");
++# else
++#  define SAVE_VFP_D32
++# endif
++
++# define INIT_TLSDESC_CALL()				\
++  unsigned long hwcap = getauxval (AT_HWCAP)
++
++/* Set each vector register to a value from 1 to 32 before the TLS access,
++   dump to memory after TLS access, and compare with the expected values.  */
++
++# define BEFORE_TLSDESC_CALL()				\
++  if (hwcap & HWCAP_ARM_VFP)				\
++    {							\
++      asm volatile ("vldr  d0,=1" : : : "d0");		\
++      asm volatile ("vldr  d1,=2" : : : "d1");		\
++      asm volatile ("vldr  d2,=3" : : : "d1");		\
++      asm volatile ("vldr  d3,=4" : : : "d3");		\
++      asm volatile ("vldr  d4,=5" : : : "d4");		\
++      asm volatile ("vldr  d5,=6" : : : "d5");		\
++      asm volatile ("vldr  d6,=7" : : : "d6");		\
++      asm volatile ("vldr  d7,=8" : : : "d7");		\
++      asm volatile ("vldr  d8,=9" : : : "d8");		\
++      asm volatile ("vldr  d9,=10" : : : "d9");		\
++      asm volatile ("vldr d10,=11" : : : "d10");	\
++      asm volatile ("vldr d11,=12" : : : "d11");	\
++      asm volatile ("vldr d12,=13" : : : "d12");	\
++      asm volatile ("vldr d13,=14" : : : "d13");	\
++      asm volatile ("vldr d14,=15" : : : "d14");	\
++      asm volatile ("vldr d15,=16" : : : "d15");	\
++    }							\
++  if (hwcap & HWCAP_ARM_VFPD32)				\
++    {							\
++      SAVE_VFP_D32					\
++    }
++
++# define VFP_STACK_REQ (16*8)
++# if __BYTE_ORDER == __BIG_ENDIAN
++#  define DISP 7
++# else
++#  define DISP 0
++# endif
++
++# ifdef HAVE_ARM_PCS_VFP_D32
++#  define CHECK_VFP_D32							\
++      char vfp[VFP_STACK_REQ];						\
++      asm volatile ("vstmia %0, {d16-d31}\n"				\
++		    :							\
++		    : "r" (vfp)						\
++		    : "memory");					\
++									\
++      char expected[VFP_STACK_REQ] = { 0 };				\
++      for (int i = 0; i < 16; ++i)					\
++	expected[i * 8 + DISP] = i + 17;				\
++									\
++      if (memcmp (vfp, expected, VFP_STACK_REQ) != 0)			\
++        abort ();
++# else
++#  define CHECK_VFP_D32
++# endif
++
++# define AFTER_TLSDESC_CALL()						\
++  if (hwcap & HWCAP_ARM_VFP)						\
++    {									\
++      char vfp[VFP_STACK_REQ];						\
++      asm volatile ("vstmia %0, {d0-d15}\n"				\
++		    :							\
++		    : "r" (vfp)						\
++		    : "memory");					\
++									\
++      char expected[VFP_STACK_REQ] = { 0 };				\
++      for (int i = 0; i < 16; ++i)					\
++	expected[i * 8 + DISP] = i + 1;					\
++									\
++      if (memcmp (vfp, expected, VFP_STACK_REQ) != 0)			\
++        abort ();							\
++    }									\
++  if (hwcap & HWCAP_ARM_VFPD32)						\
++    {									\
++      CHECK_VFP_D32							\
++    }
++
++#endif /* __SOFTFP__ */
++
++#include_next <tst-gnu2-tls2.h>
+
+commit aded2fc004e7ee85cf0b45b1382552d41e555a23
+Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
+Date:   Tue Mar 12 13:21:20 2024 -0300
+
+    elf: Enable TLS descriptor tests on aarch64
+    
+    The aarch64 uses 'trad' for traditional tls and 'desc' for tls
+    descriptors, but unlike other targets it defaults to 'desc'.  The
+    gnutls2 configure check does not set aarch64 as an ABI that uses
+    TLS descriptors, which then disable somes stests.
+    
+    Also rename the internal machinery fron gnu2 to tls descriptors.
+    
+    Checked on aarch64-linux-gnu.
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    
+    (cherry picked from commit 3d53d18fc71c5d9ef4773b8bce04d54b80181926)
+
+diff --git a/configure b/configure
+index 117b48a421..432e40a592 100755
+--- a/configure
++++ b/configure
+@@ -653,7 +653,7 @@ LIBGD
+ libc_cv_cc_loop_to_function
+ libc_cv_cc_submachine
+ libc_cv_cc_nofma
+-libc_cv_mtls_dialect_gnu2
++libc_cv_mtls_descriptor
+ libc_cv_has_glob_dat
+ libc_cv_fpie
+ libc_cv_z_execstack
+@@ -4760,6 +4760,9 @@ libc_config_ok=no
+ # whether to use such directories.
+ with_fp_cond=1
+ 
++# A preconfigure script may define another name to TLS descriptor variant
++mtls_descriptor=gnu2
++
+ if frags=`ls -d $srcdir/sysdeps/*/preconfigure 2> /dev/null`
+ then
+   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysdeps preconfigure fragments" >&5
+@@ -7006,9 +7009,9 @@ fi
+ printf "%s\n" "$libc_cv_has_glob_dat" >&6; }
+ 
+ 
+-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -mtls-dialect=gnu2" >&5
+-printf %s "checking for -mtls-dialect=gnu2... " >&6; }
+-if test ${libc_cv_mtls_dialect_gnu2+y}
++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for tls descriptor support" >&5
++printf %s "checking for tls descriptor support... " >&6; }
++if test ${libc_cv_mtls_descriptor+y}
+ then :
+   printf %s "(cached) " >&6
+ else $as_nop
+@@ -7019,7 +7022,7 @@ void foo (void)
+   i = 10;
+ }
+ EOF
+-if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
++if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_descriptor -nostdlib -nostartfiles
+ 		   -shared conftest.c -o conftest 1>&5'
+   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+   (eval $ac_try) 2>&5
+@@ -7027,17 +7030,17 @@ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nost
+   printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+   test $ac_status = 0; }; }
+ then
+-  libc_cv_mtls_dialect_gnu2=yes
++  libc_cv_mtls_descriptor=$mtls_descriptor
+ else
+-  libc_cv_mtls_dialect_gnu2=no
++  libc_cv_mtls_descriptor=no
+ fi
+ rm -f conftest*
+ fi
+-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mtls_dialect_gnu2" >&5
+-printf "%s\n" "$libc_cv_mtls_dialect_gnu2" >&6; }
++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mtls_descriptor" >&5
++printf "%s\n" "$libc_cv_mtls_descriptor" >&6; }
+ 
+ config_vars="$config_vars
+-have-mtls-dialect-gnu2 = $libc_cv_mtls_dialect_gnu2"
++have-mtls-descriptor = $libc_cv_mtls_descriptor"
+ 
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if -Wno-ignored-attributes is required for aliases" >&5
+ printf %s "checking if -Wno-ignored-attributes is required for aliases... " >&6; }
+diff --git a/configure.ac b/configure.ac
+index 19b88a47a5..bdc385d03c 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -442,6 +442,9 @@ libc_config_ok=no
+ # whether to use such directories.
+ with_fp_cond=1
+ 
++# A preconfigure script may define another name to TLS descriptor variant
++mtls_descriptor=gnu2
++
+ dnl Let sysdeps/*/preconfigure act here.
+ LIBC_PRECONFIGURE([$srcdir], [for sysdeps])
+ 
+@@ -1287,7 +1290,7 @@ fi
+ rm -f conftest*])
+ AC_SUBST(libc_cv_has_glob_dat)
+ 
+-AC_CACHE_CHECK([for -mtls-dialect=gnu2], libc_cv_mtls_dialect_gnu2,
++AC_CACHE_CHECK([for tls descriptor support], libc_cv_mtls_descriptor,
+ [dnl
+ cat > conftest.c <<EOF
+ __thread int i;
+@@ -1296,16 +1299,16 @@ void foo (void)
+   i = 10;
+ }
+ EOF
+-if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
++if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_descriptor -nostdlib -nostartfiles
+ 		   -shared conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD])
+ then
+-  libc_cv_mtls_dialect_gnu2=yes
++  libc_cv_mtls_descriptor=$mtls_descriptor
+ else
+-  libc_cv_mtls_dialect_gnu2=no
++  libc_cv_mtls_descriptor=no
+ fi
+ rm -f conftest*])
+-AC_SUBST(libc_cv_mtls_dialect_gnu2)
+-LIBC_CONFIG_VAR([have-mtls-dialect-gnu2], [$libc_cv_mtls_dialect_gnu2])
++AC_SUBST(libc_cv_mtls_descriptor)
++LIBC_CONFIG_VAR([have-mtls-descriptor], [$libc_cv_mtls_descriptor])
+ 
+ dnl clang emits an warning for a double alias redirection, to warn the
+ dnl original symbol is sed even when weak definition overrides it.
+diff --git a/elf/Makefile b/elf/Makefile
+index 030db4d207..69aa423c4b 100644
+--- a/elf/Makefile
++++ b/elf/Makefile
+@@ -999,13 +999,13 @@ modules-names-tests = $(filter-out ifuncmod% tst-tlsmod%,\
+ # For +depfiles in Makerules.
+ extra-test-objs += tst-auditmod17.os
+ 
+-ifeq (yes,$(have-mtls-dialect-gnu2))
++ifneq (no,$(have-mtls-descriptor))
+ tests += tst-gnu2-tls1
+ modules-names += tst-gnu2-tls1mod
+ $(objpfx)tst-gnu2-tls1: $(objpfx)tst-gnu2-tls1mod.so
+ tst-gnu2-tls1mod.so-no-z-defs = yes
+-CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=gnu2
+-endif # $(have-mtls-dialect-gnu2)
++CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=$(have-mtls-descriptor)
++endif # $(have-mtls-descriptor)
+ 
+ ifeq (yes,$(have-protected-data))
+ modules-names += tst-protected1moda tst-protected1modb
+@@ -2972,11 +2972,11 @@ $(objpfx)tst-tls-allocation-failure-static-patched.out: \
+ $(objpfx)tst-audit-tlsdesc: $(objpfx)tst-audit-tlsdesc-mod1.so \
+ 			    $(objpfx)tst-audit-tlsdesc-mod2.so \
+ 			    $(shared-thread-library)
+-ifeq (yes,$(have-mtls-dialect-gnu2))
++ifneq (no,$(have-mtls-descriptor))
+ # The test is valid for all TLS types, but we want to exercise GNU2
+ # TLS if possible.
+-CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=gnu2
+-CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=gnu2
++CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=$(have-mtls-descriptor)
++CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=$(have-mtls-descriptor)
+ endif
+ $(objpfx)tst-audit-tlsdesc-dlopen: $(shared-thread-library)
+ $(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-audit-tlsdesc-mod1.so \
+@@ -3055,11 +3055,11 @@ $(objpfx)tst-gnu2-tls2.out: \
+   $(objpfx)tst-gnu2-tls2mod1.so \
+   $(objpfx)tst-gnu2-tls2mod2.so
+ 
+-ifeq (yes,$(have-mtls-dialect-gnu2))
+-CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2
+-CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2
+-CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2
+-CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=gnu2
+-CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=gnu2
+-CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=gnu2
++ifneq (no,$(have-mtls-descriptor))
++CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=$(have-mtls-descriptor)
++CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=$(have-mtls-descriptor)
++CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=$(have-mtls-descriptor)
++CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=$(have-mtls-descriptor)
++CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=$(have-mtls-descriptor)
++CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=$(have-mtls-descriptor)
+ endif
+diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure
+index d9bd1f8558..19657b627b 100644
+--- a/sysdeps/aarch64/preconfigure
++++ b/sysdeps/aarch64/preconfigure
+@@ -2,5 +2,6 @@ case "$machine" in
+ aarch64*)
+ 	base_machine=aarch64
+ 	machine=aarch64
++	mtls_descriptor=desc
+ 	;;
+ esac
+diff --git a/sysdeps/arm/Makefile b/sysdeps/arm/Makefile
+index d5cea717a9..619474eca9 100644
+--- a/sysdeps/arm/Makefile
++++ b/sysdeps/arm/Makefile
+@@ -13,15 +13,15 @@ $(objpfx)libgcc-stubs.a: $(objpfx)aeabi_unwind_cpp_pr1.os
+ lib-noranlib: $(objpfx)libgcc-stubs.a
+ 
+ ifeq ($(build-shared),yes)
+-ifeq (yes,$(have-mtls-dialect-gnu2))
++ifneq (no,$(have-mtls-descriptor))
+ tests += tst-armtlsdescloc tst-armtlsdescextnow tst-armtlsdescextlazy
+ modules-names += tst-armtlsdesclocmod
+ modules-names += tst-armtlsdescextlazymod tst-armtlsdescextnowmod
+ CPPFLAGS-tst-armtlsdescextnowmod.c += -Dstatic=
+ CPPFLAGS-tst-armtlsdescextlazymod.c += -Dstatic=
+-CFLAGS-tst-armtlsdesclocmod.c += -mtls-dialect=gnu2
+-CFLAGS-tst-armtlsdescextnowmod.c += -mtls-dialect=gnu2
+-CFLAGS-tst-armtlsdescextlazymod.c += -mtls-dialect=gnu2
++CFLAGS-tst-armtlsdesclocmod.c += -mtls-dialect=$(have-mtls-descriptor)
++CFLAGS-tst-armtlsdescextnowmod.c += -mtls-dialect=$(have-mtls-descriptor)
++CFLAGS-tst-armtlsdescextlazymod.c += -mtls-dialect=$(have-mtls-descriptor)
+ LDFLAGS-tst-armtlsdescextnowmod.so += -Wl,-z,now
+ tst-armtlsdescloc-ENV = LD_BIND_NOW=1
+ tst-armtlsdescextnow-ENV = LD_BIND_NOW=1
+
+commit 5a461f2949ded98d8211939f84988bc464c7b4fe
+Author: Andreas Schwab <schwab@suse.de>
+Date:   Tue Mar 19 13:49:50 2024 +0100
+
+    Add tst-gnu2-tls2mod1 to test-internal-extras
+    
+    That allows sysdeps/x86_64/tst-gnu2-tls2mod1.S to use internal headers.
+    
+    Fixes: 717ebfa85c ("x86-64: Allocate state buffer space for RDI, RSI and RBX")
+    (cherry picked from commit fd7ee2e6c5eb49e4a630a9978b4d668bff6354ee)
+
+diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
+index e8babc9a4e..9d374a3299 100644
+--- a/sysdeps/x86_64/Makefile
++++ b/sysdeps/x86_64/Makefile
+@@ -210,6 +210,8 @@ tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2
+ $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so
+ endif
+ 
++test-internal-extras += tst-gnu2-tls2mod1
++
+ endif # $(subdir) == elf
+ 
+ ifeq ($(subdir),csu)
+
+commit aa4249266e9906c4bc833e4847f4d8feef59504f
+Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
+Date:   Thu Feb 8 10:08:38 2024 -0300
+
+    x86: Fix Zen3/Zen4 ERMS selection (BZ 30994)
+    
+    The REP MOVSB usage on memcpy/memmove does not show much performance
+    improvement on Zen3/Zen4 cores compared to the vectorized loops.  Also,
+    as from BZ 30994, if the source is aligned and the destination is not
+    the performance can be 20x slower.
+    
+    The performance difference is noticeable with small buffer sizes, closer
+    to the lower bounds limits when memcpy/memmove starts to use ERMS.  The
+    performance of REP MOVSB is similar to vectorized instruction on the
+    size limit (the L2 cache).  Also, there is no drawback to multiple cores
+    sharing the cache.
+    
+    Checked on x86_64-linux-gnu on Zen3.
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    
+    (cherry picked from commit 0c0d39fe4aeb0f69b26e76337c5dfd5530d5d44e)
+
+diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
+index d5101615e3..f34d12846c 100644
+--- a/sysdeps/x86/dl-cacheinfo.h
++++ b/sysdeps/x86/dl-cacheinfo.h
+@@ -791,7 +791,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+   long int data = -1;
+   long int shared = -1;
+   long int shared_per_thread = -1;
+-  long int core = -1;
+   unsigned int threads = 0;
+   unsigned long int level1_icache_size = -1;
+   unsigned long int level1_icache_linesize = -1;
+@@ -809,7 +808,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+   if (cpu_features->basic.kind == arch_kind_intel)
+     {
+       data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
+-      core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
+       shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
+       shared_per_thread = shared;
+ 
+@@ -822,7 +820,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+ 	= handle_intel (_SC_LEVEL1_DCACHE_ASSOC, cpu_features);
+       level1_dcache_linesize
+ 	= handle_intel (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features);
+-      level2_cache_size = core;
++      level2_cache_size
++	= handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
+       level2_cache_assoc
+ 	= handle_intel (_SC_LEVEL2_CACHE_ASSOC, cpu_features);
+       level2_cache_linesize
+@@ -835,12 +834,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+       level4_cache_size
+ 	= handle_intel (_SC_LEVEL4_CACHE_SIZE, cpu_features);
+ 
+-      get_common_cache_info (&shared, &shared_per_thread, &threads, core);
++      get_common_cache_info (&shared, &shared_per_thread, &threads,
++			     level2_cache_size);
+     }
+   else if (cpu_features->basic.kind == arch_kind_zhaoxin)
+     {
+       data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE);
+-      core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
+       shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE);
+       shared_per_thread = shared;
+ 
+@@ -849,19 +848,19 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+       level1_dcache_size = data;
+       level1_dcache_assoc = handle_zhaoxin (_SC_LEVEL1_DCACHE_ASSOC);
+       level1_dcache_linesize = handle_zhaoxin (_SC_LEVEL1_DCACHE_LINESIZE);
+-      level2_cache_size = core;
++      level2_cache_size = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
+       level2_cache_assoc = handle_zhaoxin (_SC_LEVEL2_CACHE_ASSOC);
+       level2_cache_linesize = handle_zhaoxin (_SC_LEVEL2_CACHE_LINESIZE);
+       level3_cache_size = shared;
+       level3_cache_assoc = handle_zhaoxin (_SC_LEVEL3_CACHE_ASSOC);
+       level3_cache_linesize = handle_zhaoxin (_SC_LEVEL3_CACHE_LINESIZE);
+ 
+-      get_common_cache_info (&shared, &shared_per_thread, &threads, core);
++      get_common_cache_info (&shared, &shared_per_thread, &threads,
++			     level2_cache_size);
+     }
+   else if (cpu_features->basic.kind == arch_kind_amd)
+     {
+       data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
+-      core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
+       shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
+ 
+       level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
+@@ -869,7 +868,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+       level1_dcache_size = data;
+       level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC);
+       level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE);
+-      level2_cache_size = core;
++      level2_cache_size = handle_amd (_SC_LEVEL2_CACHE_SIZE);;
+       level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC);
+       level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE);
+       level3_cache_size = shared;
+@@ -880,12 +879,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+       if (shared <= 0)
+         {
+            /* No shared L3 cache.  All we have is the L2 cache.  */
+-           shared = core;
++           shared = level2_cache_size;
+         }
+       else if (cpu_features->basic.family < 0x17)
+         {
+            /* Account for exclusive L2 and L3 caches.  */
+-           shared += core;
++           shared += level2_cache_size;
+         }
+ 
+       shared_per_thread = shared;
+@@ -987,6 +986,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+   if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
+     rep_movsb_threshold = 2112;
+ 
++   /* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of
++      cases slower than the vectorized path (and for some alignments,
++      it is really slow, check BZ #30994).  */
++  if (cpu_features->basic.kind == arch_kind_amd)
++    rep_movsb_threshold = non_temporal_threshold;
++
+   /* The default threshold to use Enhanced REP STOSB.  */
+   unsigned long int rep_stosb_threshold = 2048;
+ 
+@@ -1028,16 +1033,9 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+ 			   SIZE_MAX);
+ 
+   unsigned long int rep_movsb_stop_threshold;
+-  /* ERMS feature is implemented from AMD Zen3 architecture and it is
+-     performing poorly for data above L2 cache size. Henceforth, adding
+-     an upper bound threshold parameter to limit the usage of Enhanced
+-     REP MOVSB operations and setting its value to L2 cache size.  */
+-  if (cpu_features->basic.kind == arch_kind_amd)
+-    rep_movsb_stop_threshold = core;
+   /* Setting the upper bound of ERMS to the computed value of
+-     non-temporal threshold for architectures other than AMD.  */
+-  else
+-    rep_movsb_stop_threshold = non_temporal_threshold;
++     non-temporal threshold for all architectures.  */
++  rep_movsb_stop_threshold = non_temporal_threshold;
+ 
+   cpu_features->data_cache_size = data;
+   cpu_features->shared_cache_size = shared;
+
+commit 6484a92698039c4a7a510f0214e22d067b0d78b3
+Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
+Date:   Thu Feb 8 10:08:39 2024 -0300
+
+    x86: Do not prefer ERMS for memset on Zen3+
+    
+    For AMD Zen3+ architecture, the performance of the vectorized loop is
+    slightly better than ERMS.
+    
+    Checked on x86_64-linux-gnu on Zen3.
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    
+    (cherry picked from commit 272708884cb750f12f5c74a00e6620c19dc6d567)
+
+diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
+index f34d12846c..5a98f70364 100644
+--- a/sysdeps/x86/dl-cacheinfo.h
++++ b/sysdeps/x86/dl-cacheinfo.h
+@@ -1021,6 +1021,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+      minimum value is fixed.  */
+   rep_stosb_threshold = TUNABLE_GET (x86_rep_stosb_threshold,
+ 				     long int, NULL);
++  if (cpu_features->basic.kind == arch_kind_amd
++      && !TUNABLE_IS_INITIALIZED (x86_rep_stosb_threshold))
++    /* For AMD Zen3+ architecture, the performance of the vectorized loop is
++       slightly better than ERMS.  */
++    rep_stosb_threshold = SIZE_MAX;
+ 
+   TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX);
+   TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX);
+
+commit 5d070d12b3a52bc44dd1b71743abc4b6243862ae
+Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
+Date:   Thu Feb 8 10:08:40 2024 -0300
+
+    x86: Expand the comment on when REP STOSB is used on memset
+    
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    (cherry picked from commit 491e55beab7457ed310a4a47496f4a333c5d1032)
+
+diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+index 9984c3ca0f..97839a2248 100644
+--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
++++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+@@ -21,7 +21,9 @@
+    2. If size is less than VEC, use integer register stores.
+    3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores.
+    4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores.
+-   5. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
++   5. On machines ERMS feature, if size is greater or equal than
++      __x86_rep_stosb_threshold then REP STOSB will be used.
++   6. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
+       4 VEC stores and store 4 * VEC at a time until done.  */
+ 
+ #include <sysdep.h>
+
+commit 31c7d69af59da0da80caa74b2ec6ae149013384d
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Fri Feb 16 07:40:37 2024 +0100
+
+    i386: Use generic memrchr in libc (bug 31316)
+    
+    Before this change, we incorrectly used the SSE2 variant in the
+    implementation, without checking that the system actually supports
+    SSE2.
+    
+    Tested-by: Sam James <sam@gentoo.org>
+    (cherry picked from commit 0d9166c2245cad4ac520b337dee40c9a583872b6)
+
+diff --git a/sysdeps/i386/i686/multiarch/memrchr-c.c b/sysdeps/i386/i686/multiarch/memrchr-c.c
+index ef7bbbe792..20bfdf3af3 100644
+--- a/sysdeps/i386/i686/multiarch/memrchr-c.c
++++ b/sysdeps/i386/i686/multiarch/memrchr-c.c
+@@ -5,3 +5,4 @@ extern void *__memrchr_ia32 (const void *, int, size_t);
+ #endif
+ 
+ #include "string/memrchr.c"
++strong_alias (__memrchr_ia32, __GI___memrchr)
+diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2.S b/sysdeps/i386/i686/multiarch/memrchr-sse2.S
+index d9dae04171..e123f87435 100644
+--- a/sysdeps/i386/i686/multiarch/memrchr-sse2.S
++++ b/sysdeps/i386/i686/multiarch/memrchr-sse2.S
+@@ -720,5 +720,4 @@ L(ret_null):
+ 	ret
+ 
+ END (__memrchr_sse2)
+-strong_alias (__memrchr_sse2, __GI___memrchr)
+ #endif
+
+commit b0e0a07018098c2c5927796be5681a298c312626
+Author: Joe Ramsay <Joe.Ramsay@arm.com>
+Date:   Tue Feb 20 16:44:13 2024 +0000
+
+    aarch64/fpu: Sync libmvec routines from 2.39 and before with AOR
+    
+    This includes a fix for big-endian in AdvSIMD log, some cosmetic
+    changes, and numerous small optimisations mainly around inlining and
+    using indexed variants of MLA intrinsics.
+    Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+    
+    (cherry picked from commit e302e1021391d13a9611ba3a910df128830bd19e)
+
+diff --git a/sysdeps/aarch64/fpu/acos_advsimd.c b/sysdeps/aarch64/fpu/acos_advsimd.c
+index a8eabb5e71..0a86c9823a 100644
+--- a/sysdeps/aarch64/fpu/acos_advsimd.c
++++ b/sysdeps/aarch64/fpu/acos_advsimd.c
+@@ -40,8 +40,8 @@ static const struct data
+ };
+ 
+ #define AllMask v_u64 (0xffffffffffffffff)
+-#define Oneu (0x3ff0000000000000)
+-#define Small (0x3e50000000000000) /* 2^-53.  */
++#define Oneu 0x3ff0000000000000
++#define Small 0x3e50000000000000 /* 2^-53.  */
+ 
+ #if WANT_SIMD_EXCEPT
+ static float64x2_t VPCS_ATTR NOINLINE
+diff --git a/sysdeps/aarch64/fpu/asin_advsimd.c b/sysdeps/aarch64/fpu/asin_advsimd.c
+index 141646e954..2de6eff407 100644
+--- a/sysdeps/aarch64/fpu/asin_advsimd.c
++++ b/sysdeps/aarch64/fpu/asin_advsimd.c
+@@ -39,8 +39,8 @@ static const struct data
+ };
+ 
+ #define AllMask v_u64 (0xffffffffffffffff)
+-#define One (0x3ff0000000000000)
+-#define Small (0x3e50000000000000) /* 2^-12.  */
++#define One 0x3ff0000000000000
++#define Small 0x3e50000000000000 /* 2^-12.  */
+ 
+ #if WANT_SIMD_EXCEPT
+ static float64x2_t VPCS_ATTR NOINLINE
+diff --git a/sysdeps/aarch64/fpu/atan2_sve.c b/sysdeps/aarch64/fpu/atan2_sve.c
+index 09a4c559b8..04fa71fa37 100644
+--- a/sysdeps/aarch64/fpu/atan2_sve.c
++++ b/sysdeps/aarch64/fpu/atan2_sve.c
+@@ -37,9 +37,6 @@ static const struct data
+   .pi_over_2 = 0x1.921fb54442d18p+0,
+ };
+ 
+-/* Useful constants.  */
+-#define SignMask sv_u64 (0x8000000000000000)
+-
+ /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
+ static svfloat64_t NOINLINE
+ special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
+@@ -72,14 +69,15 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
+   svbool_t cmp_y = zeroinfnan (iy, pg);
+   svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
+ 
+-  svuint64_t sign_x = svand_x (pg, ix, SignMask);
+-  svuint64_t sign_y = svand_x (pg, iy, SignMask);
+-  svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y);
+-
+   svfloat64_t ax = svabs_x (pg, x);
+   svfloat64_t ay = svabs_x (pg, y);
++  svuint64_t iax = svreinterpret_u64 (ax);
++  svuint64_t iay = svreinterpret_u64 (ay);
++
++  svuint64_t sign_x = sveor_x (pg, ix, iax);
++  svuint64_t sign_y = sveor_x (pg, iy, iay);
++  svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y);
+ 
+-  svbool_t pred_xlt0 = svcmplt (pg, x, 0.0);
+   svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
+ 
+   /* Set up z for call to atan.  */
+@@ -88,8 +86,9 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
+   svfloat64_t z = svdiv_x (pg, n, d);
+ 
+   /* Work out the correct shift.  */
+-  svfloat64_t shift = svsel (pred_xlt0, sv_f64 (-2.0), sv_f64 (0.0));
+-  shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift);
++  svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1));
++  shift = svsel (pred_aygtax, sv_f64 (1.0), shift);
++  shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift)));
+   shift = svmul_x (pg, shift, data_ptr->pi_over_2);
+ 
+   /* Use split Estrin scheme for P(z^2) with deg(P)=19.  */
+@@ -109,10 +108,10 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
+   ret = svadd_m (pg, ret, shift);
+ 
+   /* Account for the sign of x and y.  */
+-  ret = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy));
+-
+   if (__glibc_unlikely (svptest_any (pg, cmp_xy)))
+-    return special_case (y, x, ret, cmp_xy);
+-
+-  return ret;
++    return special_case (
++	y, x,
++	svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)),
++	cmp_xy);
++  return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy));
+ }
+diff --git a/sysdeps/aarch64/fpu/atan2f_sve.c b/sysdeps/aarch64/fpu/atan2f_sve.c
+index b92f83cdea..9ea197147c 100644
+--- a/sysdeps/aarch64/fpu/atan2f_sve.c
++++ b/sysdeps/aarch64/fpu/atan2f_sve.c
+@@ -32,10 +32,8 @@ static const struct data
+   .pi_over_2 = 0x1.921fb6p+0f,
+ };
+ 
+-#define SignMask sv_u32 (0x80000000)
+-
+ /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
+-static inline svfloat32_t
++static svfloat32_t NOINLINE
+ special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
+ 	      const svbool_t cmp)
+ {
+@@ -67,14 +65,15 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
+   svbool_t cmp_y = zeroinfnan (iy, pg);
+   svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
+ 
+-  svuint32_t sign_x = svand_x (pg, ix, SignMask);
+-  svuint32_t sign_y = svand_x (pg, iy, SignMask);
+-  svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y);
+-
+   svfloat32_t ax = svabs_x (pg, x);
+   svfloat32_t ay = svabs_x (pg, y);
++  svuint32_t iax = svreinterpret_u32 (ax);
++  svuint32_t iay = svreinterpret_u32 (ay);
++
++  svuint32_t sign_x = sveor_x (pg, ix, iax);
++  svuint32_t sign_y = sveor_x (pg, iy, iay);
++  svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y);
+ 
+-  svbool_t pred_xlt0 = svcmplt (pg, x, 0.0);
+   svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
+ 
+   /* Set up z for call to atan.  */
+@@ -83,11 +82,12 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
+   svfloat32_t z = svdiv_x (pg, n, d);
+ 
+   /* Work out the correct shift.  */
+-  svfloat32_t shift = svsel (pred_xlt0, sv_f32 (-2.0), sv_f32 (0.0));
+-  shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift);
++  svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1));
++  shift = svsel (pred_aygtax, sv_f32 (1.0), shift);
++  shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift)));
+   shift = svmul_x (pg, shift, sv_f32 (data_ptr->pi_over_2));
+ 
+-  /* Use split Estrin scheme for P(z^2) with deg(P)=7.  */
++  /* Use pure Estrin scheme for P(z^2) with deg(P)=7.  */
+   svfloat32_t z2 = svmul_x (pg, z, z);
+   svfloat32_t z4 = svmul_x (pg, z2, z2);
+   svfloat32_t z8 = svmul_x (pg, z4, z4);
+@@ -101,10 +101,12 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
+   ret = svadd_m (pg, ret, shift);
+ 
+   /* Account for the sign of x and y.  */
+-  ret = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy));
+ 
+   if (__glibc_unlikely (svptest_any (pg, cmp_xy)))
+-    return special_case (y, x, ret, cmp_xy);
++    return special_case (
++	y, x,
++	svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)),
++	cmp_xy);
+ 
+-  return ret;
++  return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy));
+ }
+diff --git a/sysdeps/aarch64/fpu/cos_advsimd.c b/sysdeps/aarch64/fpu/cos_advsimd.c
+index 2897e8b909..3924c9ce44 100644
+--- a/sysdeps/aarch64/fpu/cos_advsimd.c
++++ b/sysdeps/aarch64/fpu/cos_advsimd.c
+@@ -63,8 +63,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x)
+        special-case handler later.  */
+     r = vbslq_f64 (cmp, v_f64 (1.0), r);
+ #else
+-  cmp = vcageq_f64 (d->range_val, x);
+-  cmp = vceqzq_u64 (cmp); /* cmp = ~cmp.  */
++  cmp = vcageq_f64 (x, d->range_val);
+   r = x;
+ #endif
+ 
+diff --git a/sysdeps/aarch64/fpu/cosf_advsimd.c b/sysdeps/aarch64/fpu/cosf_advsimd.c
+index 60abc8dfcf..d0c285b03a 100644
+--- a/sysdeps/aarch64/fpu/cosf_advsimd.c
++++ b/sysdeps/aarch64/fpu/cosf_advsimd.c
+@@ -64,8 +64,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cos) (float32x4_t x)
+        special-case handler later.  */
+     r = vbslq_f32 (cmp, v_f32 (1.0f), r);
+ #else
+-  cmp = vcageq_f32 (d->range_val, x);
+-  cmp = vceqzq_u32 (cmp); /* cmp = ~cmp.  */
++  cmp = vcageq_f32 (x, d->range_val);
+   r = x;
+ #endif
+ 
+diff --git a/sysdeps/aarch64/fpu/exp10_advsimd.c b/sysdeps/aarch64/fpu/exp10_advsimd.c
+index fe7149b191..eeb31ca839 100644
+--- a/sysdeps/aarch64/fpu/exp10_advsimd.c
++++ b/sysdeps/aarch64/fpu/exp10_advsimd.c
+@@ -57,7 +57,7 @@ const static struct data
+ # define BigBound v_u64 (0x4070000000000000)  /* asuint64 (0x1p8).  */
+ # define Thres v_u64 (0x2070000000000000)     /* BigBound - TinyBound.  */
+ 
+-static inline float64x2_t VPCS_ATTR
++static float64x2_t VPCS_ATTR NOINLINE
+ special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
+ {
+   /* If fenv exceptions are to be triggered correctly, fall back to the scalar
+@@ -72,7 +72,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
+ # define SpecialBias1 v_u64 (0x7000000000000000)  /* 0x1p769.  */
+ # define SpecialBias2 v_u64 (0x3010000000000000)  /* 0x1p-254.  */
+ 
+-static float64x2_t VPCS_ATTR NOINLINE
++static inline float64x2_t VPCS_ATTR
+ special_case (float64x2_t s, float64x2_t y, float64x2_t n,
+ 	      const struct data *d)
+ {
+diff --git a/sysdeps/aarch64/fpu/exp10f_advsimd.c b/sysdeps/aarch64/fpu/exp10f_advsimd.c
+index 7ee0c90948..ab117b69da 100644
+--- a/sysdeps/aarch64/fpu/exp10f_advsimd.c
++++ b/sysdeps/aarch64/fpu/exp10f_advsimd.c
+@@ -25,7 +25,8 @@
+ static const struct data
+ {
+   float32x4_t poly[5];
+-  float32x4_t shift, log10_2, log2_10_hi, log2_10_lo;
++  float32x4_t log10_2_and_inv, shift;
++
+ #if !WANT_SIMD_EXCEPT
+   float32x4_t scale_thresh;
+ #endif
+@@ -38,9 +39,9 @@ static const struct data
+   .poly = { V4 (0x1.26bb16p+1f), V4 (0x1.5350d2p+1f), V4 (0x1.04744ap+1f),
+ 	    V4 (0x1.2d8176p+0f), V4 (0x1.12b41ap-1f) },
+   .shift = V4 (0x1.8p23f),
+-  .log10_2 = V4 (0x1.a934fp+1),
+-  .log2_10_hi = V4 (0x1.344136p-2),
+-  .log2_10_lo = V4 (-0x1.ec10cp-27),
++
++  /* Stores constants 1/log10(2), log10(2)_high, log10(2)_low, 0.  */
++  .log10_2_and_inv = { 0x1.a934fp+1, 0x1.344136p-2, -0x1.ec10cp-27, 0 },
+ #if !WANT_SIMD_EXCEPT
+   .scale_thresh = V4 (ScaleBound)
+ #endif
+@@ -98,24 +99,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp10) (float32x4_t x)
+ #if WANT_SIMD_EXCEPT
+   /* asuint(x) - TinyBound >= BigBound - TinyBound.  */
+   uint32x4_t cmp = vcgeq_u32 (
+-      vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)),
+-		 TinyBound),
+-      Thres);
++      vsubq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (x)), TinyBound), Thres);
+   float32x4_t xm = x;
+   /* If any lanes are special, mask them with 1 and retain a copy of x to allow
+      special case handler to fix special lanes later. This is only necessary if
+      fenv exceptions are to be triggered correctly.  */
+   if (__glibc_unlikely (v_any_u32 (cmp)))
+-    x = vbslq_f32 (cmp, v_f32 (1), x);
++    x = v_zerofy_f32 (x, cmp);
+ #endif
+ 
+   /* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)),
+      with poly(r) in [1/sqrt(2), sqrt(2)] and
+      x = r + n * log10 (2), with r in [-log10(2)/2, log10(2)/2].  */
+-  float32x4_t z = vfmaq_f32 (d->shift, x, d->log10_2);
++  float32x4_t z = vfmaq_laneq_f32 (d->shift, x, d->log10_2_and_inv, 0);
+   float32x4_t n = vsubq_f32 (z, d->shift);
+-  float32x4_t r = vfmsq_f32 (x, n, d->log2_10_hi);
+-  r = vfmsq_f32 (r, n, d->log2_10_lo);
++  float32x4_t r = vfmsq_laneq_f32 (x, n, d->log10_2_and_inv, 1);
++  r = vfmsq_laneq_f32 (r, n, d->log10_2_and_inv, 2);
+   uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
+ 
+   float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
+diff --git a/sysdeps/aarch64/fpu/exp2_advsimd.c b/sysdeps/aarch64/fpu/exp2_advsimd.c
+index 391a93180c..ae1e63d503 100644
+--- a/sysdeps/aarch64/fpu/exp2_advsimd.c
++++ b/sysdeps/aarch64/fpu/exp2_advsimd.c
+@@ -24,6 +24,7 @@
+ #define IndexMask (N - 1)
+ #define BigBound 1022.0
+ #define UOFlowBound 1280.0
++#define TinyBound 0x2000000000000000 /* asuint64(0x1p-511).  */
+ 
+ static const struct data
+ {
+@@ -48,14 +49,13 @@ lookup_sbits (uint64x2_t i)
+ 
+ #if WANT_SIMD_EXCEPT
+ 
+-# define TinyBound 0x2000000000000000 /* asuint64(0x1p-511).  */
+ # define Thres 0x2080000000000000     /* asuint64(512.0) - TinyBound.  */
+ 
+ /* Call scalar exp2 as a fallback.  */
+ static float64x2_t VPCS_ATTR NOINLINE
+-special_case (float64x2_t x)
++special_case (float64x2_t x, float64x2_t y, uint64x2_t is_special)
+ {
+-  return v_call_f64 (exp2, x, x, v_u64 (0xffffffffffffffff));
++  return v_call_f64 (exp2, x, y, is_special);
+ }
+ 
+ #else
+@@ -65,7 +65,7 @@ special_case (float64x2_t x)
+ # define SpecialBias1 0x7000000000000000 /* 0x1p769.  */
+ # define SpecialBias2 0x3010000000000000 /* 0x1p-254.  */
+ 
+-static float64x2_t VPCS_ATTR
++static inline float64x2_t VPCS_ATTR
+ special_case (float64x2_t s, float64x2_t y, float64x2_t n,
+ 	      const struct data *d)
+ {
+@@ -94,10 +94,10 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x)
+ #if WANT_SIMD_EXCEPT
+   uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
+   cmp = vcgeq_u64 (vsubq_u64 (ia, v_u64 (TinyBound)), v_u64 (Thres));
+-  /* If any special case (inf, nan, small and large x) is detected,
+-     fall back to scalar for all lanes.  */
+-  if (__glibc_unlikely (v_any_u64 (cmp)))
+-    return special_case (x);
++  /* Mask special lanes and retain a copy of x for passing to special-case
++     handler.  */
++  float64x2_t xc = x;
++  x = v_zerofy_f64 (x, cmp);
+ #else
+   cmp = vcagtq_f64 (x, d->scale_big_bound);
+ #endif
+@@ -120,9 +120,11 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x)
+   float64x2_t y = v_pairwise_poly_3_f64 (r, r2, d->poly);
+   y = vmulq_f64 (r, y);
+ 
+-#if !WANT_SIMD_EXCEPT
+   if (__glibc_unlikely (v_any_u64 (cmp)))
++#if !WANT_SIMD_EXCEPT
+     return special_case (s, y, n, d);
++#else
++    return special_case (xc, vfmaq_f64 (s, s, y), cmp);
+ #endif
+   return vfmaq_f64 (s, s, y);
+ }
+diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c
+index 9a5a523a10..8a686e3e05 100644
+--- a/sysdeps/aarch64/fpu/exp2f_sve.c
++++ b/sysdeps/aarch64/fpu/exp2f_sve.c
+@@ -20,6 +20,8 @@
+ #include "sv_math.h"
+ #include "poly_sve_f32.h"
+ 
++#define Thres 0x1.5d5e2ap+6f
++
+ static const struct data
+ {
+   float poly[5];
+@@ -33,7 +35,7 @@ static const struct data
+   .shift = 0x1.903f8p17f,
+   /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
+      correctly by FEXPA.  */
+-  .thres = 0x1.5d5e2ap+6f,
++  .thres = Thres,
+ };
+ 
+ static svfloat32_t NOINLINE
+diff --git a/sysdeps/aarch64/fpu/exp_advsimd.c b/sysdeps/aarch64/fpu/exp_advsimd.c
+index fd215f1d2c..5e3a9a0d44 100644
+--- a/sysdeps/aarch64/fpu/exp_advsimd.c
++++ b/sysdeps/aarch64/fpu/exp_advsimd.c
+@@ -54,7 +54,7 @@ const static volatile struct
+ # define BigBound v_u64 (0x4080000000000000) /* asuint64 (0x1p9).  */
+ # define SpecialBound v_u64 (0x2080000000000000) /* BigBound - TinyBound.  */
+ 
+-static inline float64x2_t VPCS_ATTR
++static float64x2_t VPCS_ATTR NOINLINE
+ special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
+ {
+   /* If fenv exceptions are to be triggered correctly, fall back to the scalar
+@@ -69,7 +69,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
+ # define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769.  */
+ # define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254.  */
+ 
+-static float64x2_t VPCS_ATTR NOINLINE
++static inline float64x2_t VPCS_ATTR
+ special_case (float64x2_t s, float64x2_t y, float64x2_t n)
+ {
+   /* 2^(n/N) may overflow, break it up into s1*s2.  */
+diff --git a/sysdeps/aarch64/fpu/expm1_advsimd.c b/sysdeps/aarch64/fpu/expm1_advsimd.c
+index 0b85bd06f3..3628398674 100644
+--- a/sysdeps/aarch64/fpu/expm1_advsimd.c
++++ b/sysdeps/aarch64/fpu/expm1_advsimd.c
+@@ -23,7 +23,7 @@
+ static const struct data
+ {
+   float64x2_t poly[11];
+-  float64x2_t invln2, ln2_lo, ln2_hi, shift;
++  float64x2_t invln2, ln2, shift;
+   int64x2_t exponent_bias;
+ #if WANT_SIMD_EXCEPT
+   uint64x2_t thresh, tiny_bound;
+@@ -38,8 +38,7 @@ static const struct data
+ 	    V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
+ 	    V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29) },
+   .invln2 = V2 (0x1.71547652b82fep0),
+-  .ln2_hi = V2 (0x1.62e42fefa39efp-1),
+-  .ln2_lo = V2 (0x1.abc9e3b39803fp-56),
++  .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 },
+   .shift = V2 (0x1.8p52),
+   .exponent_bias = V2 (0x3ff0000000000000),
+ #if WANT_SIMD_EXCEPT
+@@ -83,7 +82,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
+     x = v_zerofy_f64 (x, special);
+ #else
+   /* Large input, NaNs and Infs.  */
+-  uint64x2_t special = vceqzq_u64 (vcaltq_f64 (x, d->oflow_bound));
++  uint64x2_t special = vcageq_f64 (x, d->oflow_bound);
+ #endif
+ 
+   /* Reduce argument to smaller range:
+@@ -93,8 +92,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
+      where 2^i is exact because i is an integer.  */
+   float64x2_t n = vsubq_f64 (vfmaq_f64 (d->shift, d->invln2, x), d->shift);
+   int64x2_t i = vcvtq_s64_f64 (n);
+-  float64x2_t f = vfmsq_f64 (x, n, d->ln2_hi);
+-  f = vfmsq_f64 (f, n, d->ln2_lo);
++  float64x2_t f = vfmsq_laneq_f64 (x, n, d->ln2, 0);
++  f = vfmsq_laneq_f64 (f, n, d->ln2, 1);
+ 
+   /* Approximate expm1(f) using polynomial.
+      Taylor expansion for expm1(x) has the form:
+diff --git a/sysdeps/aarch64/fpu/expm1f_advsimd.c b/sysdeps/aarch64/fpu/expm1f_advsimd.c
+index 8d4c9a2193..93db200f61 100644
+--- a/sysdeps/aarch64/fpu/expm1f_advsimd.c
++++ b/sysdeps/aarch64/fpu/expm1f_advsimd.c
+@@ -23,7 +23,8 @@
+ static const struct data
+ {
+   float32x4_t poly[5];
+-  float32x4_t invln2, ln2_lo, ln2_hi, shift;
++  float32x4_t invln2_and_ln2;
++  float32x4_t shift;
+   int32x4_t exponent_bias;
+ #if WANT_SIMD_EXCEPT
+   uint32x4_t thresh;
+@@ -34,9 +35,8 @@ static const struct data
+   /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2].  */
+   .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5),
+ 	    V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) },
+-  .invln2 = V4 (0x1.715476p+0f),
+-  .ln2_hi = V4 (0x1.62e4p-1f),
+-  .ln2_lo = V4 (0x1.7f7d1cp-20f),
++  /* Stores constants: invln2, ln2_hi, ln2_lo, 0.  */
++  .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },
+   .shift = V4 (0x1.8p23f),
+   .exponent_bias = V4 (0x3f800000),
+ #if !WANT_SIMD_EXCEPT
+@@ -80,7 +80,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x)
+     x = v_zerofy_f32 (x, special);
+ #else
+   /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf.  */
+-  uint32x4_t special = vceqzq_u32 (vcaltq_f32 (x, d->oflow_bound));
++  uint32x4_t special = vcagtq_f32 (x, d->oflow_bound);
+ #endif
+ 
+   /* Reduce argument to smaller range:
+@@ -88,10 +88,11 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x)
+      and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+      exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+      where 2^i is exact because i is an integer.  */
+-  float32x4_t j = vsubq_f32 (vfmaq_f32 (d->shift, d->invln2, x), d->shift);
++  float32x4_t j = vsubq_f32 (
++      vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift);
+   int32x4_t i = vcvtq_s32_f32 (j);
+-  float32x4_t f = vfmsq_f32 (x, j, d->ln2_hi);
+-  f = vfmsq_f32 (f, j, d->ln2_lo);
++  float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1);
++  f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2);
+ 
+   /* Approximate expm1(f) using polynomial.
+      Taylor expansion for expm1(x) has the form:
+diff --git a/sysdeps/aarch64/fpu/log_advsimd.c b/sysdeps/aarch64/fpu/log_advsimd.c
+index 067ae79613..21df61728c 100644
+--- a/sysdeps/aarch64/fpu/log_advsimd.c
++++ b/sysdeps/aarch64/fpu/log_advsimd.c
+@@ -58,8 +58,13 @@ lookup (uint64x2_t i)
+   uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+   float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
+   float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
++#if __BYTE_ORDER == __LITTLE_ENDIAN
+   e.invc = vuzp1q_f64 (e0, e1);
+   e.logc = vuzp2q_f64 (e0, e1);
++#else
++  e.invc = vuzp1q_f64 (e1, e0);
++  e.logc = vuzp2q_f64 (e1, e0);
++#endif
+   return e;
+ }
+ 
+diff --git a/sysdeps/aarch64/fpu/sin_advsimd.c b/sysdeps/aarch64/fpu/sin_advsimd.c
+index efce183e86..a0d9d3b819 100644
+--- a/sysdeps/aarch64/fpu/sin_advsimd.c
++++ b/sysdeps/aarch64/fpu/sin_advsimd.c
+@@ -75,8 +75,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
+   r = vbslq_f64 (cmp, vreinterpretq_f64_u64 (cmp), x);
+ #else
+   r = x;
+-  cmp = vcageq_f64 (d->range_val, x);
+-  cmp = vceqzq_u64 (cmp); /* cmp = ~cmp.  */
++  cmp = vcageq_f64 (x, d->range_val);
+ #endif
+ 
+   /* n = rint(|x|/pi).  */
+diff --git a/sysdeps/aarch64/fpu/sinf_advsimd.c b/sysdeps/aarch64/fpu/sinf_advsimd.c
+index 60cf3f2ca1..375dfc3331 100644
+--- a/sysdeps/aarch64/fpu/sinf_advsimd.c
++++ b/sysdeps/aarch64/fpu/sinf_advsimd.c
+@@ -67,8 +67,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sin) (float32x4_t x)
+   r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x);
+ #else
+   r = x;
+-  cmp = vcageq_f32 (d->range_val, x);
+-  cmp = vceqzq_u32 (cmp); /* cmp = ~cmp.  */
++  cmp = vcageq_f32 (x, d->range_val);
+ #endif
+ 
+   /* n = rint(|x|/pi) */
+diff --git a/sysdeps/aarch64/fpu/tan_advsimd.c b/sysdeps/aarch64/fpu/tan_advsimd.c
+index d7e5ba7b1a..0459821ab2 100644
+--- a/sysdeps/aarch64/fpu/tan_advsimd.c
++++ b/sysdeps/aarch64/fpu/tan_advsimd.c
+@@ -23,7 +23,7 @@
+ static const struct data
+ {
+   float64x2_t poly[9];
+-  float64x2_t half_pi_hi, half_pi_lo, two_over_pi, shift;
++  float64x2_t half_pi, two_over_pi, shift;
+ #if !WANT_SIMD_EXCEPT
+   float64x2_t range_val;
+ #endif
+@@ -34,8 +34,7 @@ static const struct data
+ 	    V2 (0x1.226e5e5ecdfa3p-7), V2 (0x1.d6c7ddbf87047p-9),
+ 	    V2 (0x1.7ea75d05b583ep-10), V2 (0x1.289f22964a03cp-11),
+ 	    V2 (0x1.4e4fd14147622p-12) },
+-  .half_pi_hi = V2 (0x1.921fb54442d18p0),
+-  .half_pi_lo = V2 (0x1.1a62633145c07p-54),
++  .half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 },
+   .two_over_pi = V2 (0x1.45f306dc9c883p-1),
+   .shift = V2 (0x1.8p52),
+ #if !WANT_SIMD_EXCEPT
+@@ -56,15 +55,15 @@ special_case (float64x2_t x)
+ 
+ /* Vector approximation for double-precision tan.
+    Maximum measured error is 3.48 ULP:
+-   __v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
+-				 want -0x1.f6ccd8ecf7deap+37.   */
++   _ZGVnN2v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
++				      want -0x1.f6ccd8ecf7deap+37.  */
+ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
+ {
+   const struct data *dat = ptr_barrier (&data);
+-  /* Our argument reduction cannot calculate q with sufficient accuracy for very
+-     large inputs. Fall back to scalar routine for all lanes if any are too
+-     large, or Inf/NaN. If fenv exceptions are expected, also fall back for tiny
+-     input to avoid underflow.  */
++  /* Our argument reduction cannot calculate q with sufficient accuracy for
++     very large inputs. Fall back to scalar routine for all lanes if any are
++     too large, or Inf/NaN. If fenv exceptions are expected, also fall back for
++     tiny input to avoid underflow.  */
+ #if WANT_SIMD_EXCEPT
+   uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
+   /* iax - tiny_bound > range_val - tiny_bound.  */
+@@ -82,8 +81,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
+   /* Use q to reduce x to r in [-pi/4, pi/4], by:
+      r = x - q * pi/2, in extended precision.  */
+   float64x2_t r = x;
+-  r = vfmsq_f64 (r, q, dat->half_pi_hi);
+-  r = vfmsq_f64 (r, q, dat->half_pi_lo);
++  r = vfmsq_laneq_f64 (r, q, dat->half_pi, 0);
++  r = vfmsq_laneq_f64 (r, q, dat->half_pi, 1);
+   /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
+      formula.  */
+   r = vmulq_n_f64 (r, 0.5);
+@@ -106,14 +105,15 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
+      and reciprocity around pi/2:
+      tan(x) = 1 / (tan(pi/2 - x))
+      to assemble result using change-of-sign and conditional selection of
+-     numerator/denominator, dependent on odd/even-ness of q (hence quadrant). */
++     numerator/denominator, dependent on odd/even-ness of q (hence quadrant).
++   */
+   float64x2_t n = vfmaq_f64 (v_f64 (-1), p, p);
+   float64x2_t d = vaddq_f64 (p, p);
+ 
+   uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1));
+ 
+ #if !WANT_SIMD_EXCEPT
+-  uint64x2_t special = vceqzq_u64 (vcaleq_f64 (x, dat->range_val));
++  uint64x2_t special = vcageq_f64 (x, dat->range_val);
+   if (__glibc_unlikely (v_any_u64 (special)))
+     return special_case (x);
+ #endif
+diff --git a/sysdeps/aarch64/fpu/tanf_advsimd.c b/sysdeps/aarch64/fpu/tanf_advsimd.c
+index 1f16103f8a..5a7489390a 100644
+--- a/sysdeps/aarch64/fpu/tanf_advsimd.c
++++ b/sysdeps/aarch64/fpu/tanf_advsimd.c
+@@ -23,7 +23,8 @@
+ static const struct data
+ {
+   float32x4_t poly[6];
+-  float32x4_t neg_half_pi_1, neg_half_pi_2, neg_half_pi_3, two_over_pi, shift;
++  float32x4_t pi_consts;
++  float32x4_t shift;
+ #if !WANT_SIMD_EXCEPT
+   float32x4_t range_val;
+ #endif
+@@ -31,10 +32,9 @@ static const struct data
+   /* Coefficients generated using FPMinimax.  */
+   .poly = { V4 (0x1.55555p-2f), V4 (0x1.11166p-3f), V4 (0x1.b88a78p-5f),
+ 	    V4 (0x1.7b5756p-6f), V4 (0x1.4ef4cep-8f), V4 (0x1.0e1e74p-7f) },
+-  .neg_half_pi_1 = V4 (-0x1.921fb6p+0f),
+-  .neg_half_pi_2 = V4 (0x1.777a5cp-25f),
+-  .neg_half_pi_3 = V4 (0x1.ee59dap-50f),
+-  .two_over_pi = V4 (0x1.45f306p-1f),
++  /* Stores constants: (-pi/2)_high, (-pi/2)_mid, (-pi/2)_low, and 2/pi.  */
++  .pi_consts
++  = { -0x1.921fb6p+0f, 0x1.777a5cp-25f, 0x1.ee59dap-50f, 0x1.45f306p-1f },
+   .shift = V4 (0x1.8p+23f),
+ #if !WANT_SIMD_EXCEPT
+   .range_val = V4 (0x1p15f),
+@@ -58,10 +58,11 @@ eval_poly (float32x4_t z, const struct data *d)
+ {
+   float32x4_t z2 = vmulq_f32 (z, z);
+ #if WANT_SIMD_EXCEPT
+-  /* Tiny z (<= 0x1p-31) will underflow when calculating z^4. If fp exceptions
+-     are to be triggered correctly, sidestep this by fixing such lanes to 0.  */
++  /* Tiny z (<= 0x1p-31) will underflow when calculating z^4.
++     If fp exceptions are to be triggered correctly,
++     sidestep this by fixing such lanes to 0.  */
+   uint32x4_t will_uflow
+-    = vcleq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (z)), TinyBound);
++      = vcleq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (z)), TinyBound);
+   if (__glibc_unlikely (v_any_u32 (will_uflow)))
+     z2 = vbslq_f32 (will_uflow, v_f32 (0), z2);
+ #endif
+@@ -94,16 +95,16 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tan) (float32x4_t x)
+ #endif
+ 
+   /* n = rint(x/(pi/2)).  */
+-  float32x4_t q = vfmaq_f32 (d->shift, d->two_over_pi, x);
++  float32x4_t q = vfmaq_laneq_f32 (d->shift, x, d->pi_consts, 3);
+   float32x4_t n = vsubq_f32 (q, d->shift);
+   /* Determine if x lives in an interval, where |tan(x)| grows to infinity.  */
+   uint32x4_t pred_alt = vtstq_u32 (vreinterpretq_u32_f32 (q), v_u32 (1));
+ 
+   /* r = x - n * (pi/2)  (range reduction into -pi./4 .. pi/4).  */
+   float32x4_t r;
+-  r = vfmaq_f32 (x, d->neg_half_pi_1, n);
+-  r = vfmaq_f32 (r, d->neg_half_pi_2, n);
+-  r = vfmaq_f32 (r, d->neg_half_pi_3, n);
++  r = vfmaq_laneq_f32 (x, n, d->pi_consts, 0);
++  r = vfmaq_laneq_f32 (r, n, d->pi_consts, 1);
++  r = vfmaq_laneq_f32 (r, n, d->pi_consts, 2);
+ 
+   /* If x lives in an interval, where |tan(x)|
+      - is finite, then use a polynomial approximation of the form
+
+commit 395a89f61e19fa916ae4cc93fc10d81a28ce3039
+Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date:   Wed Mar 13 14:34:14 2024 +0000
+
+    aarch64: fix check for SVE support in assembler
+    
+    Due to GCC bug 110901 -mcpu can override -march setting when compiling
+    asm code and thus a compiler targetting a specific cpu can fail the
+    configure check even when binutils gas supports SVE.
+    
+    The workaround is that explicit .arch directive overrides both -mcpu
+    and -march, and since that's what the actual SVE memcpy uses the
+    configure check should use that too even if the GCC issue is fixed
+    independently.
+    
+    Reviewed-by: Florian Weimer <fweimer@redhat.com>
+    (cherry picked from commit 73c26018ed0ecd9c807bb363cc2c2ab4aca66a82)
+
+diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure
+old mode 100644
+new mode 100755
+index ca57edce47..9606137e8d
+--- a/sysdeps/aarch64/configure
++++ b/sysdeps/aarch64/configure
+@@ -325,9 +325,10 @@ then :
+   printf %s "(cached) " >&6
+ else $as_nop
+   cat > conftest.s <<\EOF
+-        ptrue p0.b
++	.arch armv8.2-a+sve
++	ptrue p0.b
+ EOF
+-if { ac_try='${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&5'
++if { ac_try='${CC-cc} -c conftest.s 1>&5'
+   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+   (eval $ac_try) 2>&5
+   ac_status=$?
+diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac
+index 27874eceb4..56d12d661d 100644
+--- a/sysdeps/aarch64/configure.ac
++++ b/sysdeps/aarch64/configure.ac
+@@ -90,9 +90,10 @@ LIBC_CONFIG_VAR([aarch64-variant-pcs], [$libc_cv_aarch64_variant_pcs])
+ # Check if asm support armv8.2-a+sve
+ AC_CACHE_CHECK([for SVE support in assembler], [libc_cv_aarch64_sve_asm], [dnl
+ cat > conftest.s <<\EOF
+-        ptrue p0.b
++	.arch armv8.2-a+sve
++	ptrue p0.b
+ EOF
+-if AC_TRY_COMMAND(${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&AS_MESSAGE_LOG_FD); then
++if AC_TRY_COMMAND(${CC-cc} -c conftest.s 1>&AS_MESSAGE_LOG_FD); then
+   libc_cv_aarch64_sve_asm=yes
+ else
+   libc_cv_aarch64_sve_asm=no
+
+commit 9d92452c70805a2e2dbbdb2b1ffc34bd86e1c8df
+Author: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Date:   Thu Mar 21 16:48:33 2024 +0000
+
+    AArch64: Check kernel version for SVE ifuncs
+    
+    Old Linux kernels disable SVE after every system call.  Calling the
+    SVE-optimized memcpy afterwards will then cause a trap to reenable SVE.
+    As a result, applications with a high use of syscalls may run slower with
+    the SVE memcpy.  This is true for kernels between 4.15.0 and before 6.2.0,
+    except for 5.14.0 which was patched.  Avoid this by checking the kernel
+    version and selecting the SVE ifunc on modern kernels.
+    
+    Parse the kernel version reported by uname() into a 24-bit kernel.major.minor
+    value without calling any library functions.  If uname() is not supported or
+    if the version format is not recognized, assume the kernel is modern.
+    
+    Tested-by: Florian Weimer <fweimer@redhat.com>
+    Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
+    (cherry picked from commit 2e94e2f5d2bf2de124c8ad7da85463355e54ccb2)
+
+diff --git a/sysdeps/aarch64/cpu-features.h b/sysdeps/aarch64/cpu-features.h
+index 77a782422a..5f2da91ebb 100644
+--- a/sysdeps/aarch64/cpu-features.h
++++ b/sysdeps/aarch64/cpu-features.h
+@@ -71,6 +71,7 @@ struct cpu_features
+   /* Currently, the GLIBC memory tagging tunable only defines 8 bits.  */
+   uint8_t mte_state;
+   bool sve;
++  bool prefer_sve_ifuncs;
+   bool mops;
+ };
+ 
+diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
+index c52860efb2..61dc40088f 100644
+--- a/sysdeps/aarch64/multiarch/init-arch.h
++++ b/sysdeps/aarch64/multiarch/init-arch.h
+@@ -36,5 +36,7 @@
+     MTE_ENABLED ();							      \
+   bool __attribute__((unused)) sve =					      \
+     GLRO(dl_aarch64_cpu_features).sve;					      \
++  bool __attribute__((unused)) prefer_sve_ifuncs =			      \
++    GLRO(dl_aarch64_cpu_features).prefer_sve_ifuncs;			      \
+   bool __attribute__((unused)) mops =					      \
+     GLRO(dl_aarch64_cpu_features).mops;
+diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
+index d12eccfca5..ce53567dab 100644
+--- a/sysdeps/aarch64/multiarch/memcpy.c
++++ b/sysdeps/aarch64/multiarch/memcpy.c
+@@ -47,7 +47,7 @@ select_memcpy_ifunc (void)
+     {
+       if (IS_A64FX (midr))
+ 	return __memcpy_a64fx;
+-      return __memcpy_sve;
++      return prefer_sve_ifuncs ? __memcpy_sve : __memcpy_generic;
+     }
+ 
+   if (IS_THUNDERX (midr))
+diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
+index 2081eeb4d4..fe95037be3 100644
+--- a/sysdeps/aarch64/multiarch/memmove.c
++++ b/sysdeps/aarch64/multiarch/memmove.c
+@@ -47,7 +47,7 @@ select_memmove_ifunc (void)
+     {
+       if (IS_A64FX (midr))
+ 	return __memmove_a64fx;
+-      return __memmove_sve;
++      return prefer_sve_ifuncs ? __memmove_sve : __memmove_generic;
+     }
+ 
+   if (IS_THUNDERX (midr))
+diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+index b1a3f673f0..c0b047bc0d 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+@@ -21,6 +21,7 @@
+ #include <sys/auxv.h>
+ #include <elf/dl-hwcaps.h>
+ #include <sys/prctl.h>
++#include <sys/utsname.h>
+ #include <dl-tunables-parse.h>
+ 
+ #define DCZID_DZP_MASK (1 << 4)
+@@ -62,6 +63,46 @@ get_midr_from_mcpu (const struct tunable_str_t *mcpu)
+   return UINT64_MAX;
+ }
+ 
++#if __LINUX_KERNEL_VERSION < 0x060200
++
++/* Return true if we prefer using SVE in string ifuncs.  Old kernels disable
++   SVE after every system call which results in unnecessary traps if memcpy
++   uses SVE.  This is true for kernels between 4.15.0 and before 6.2.0, except
++   for 5.14.0 which was patched.  For these versions return false to avoid using
++   SVE ifuncs.
++   Parse the kernel version into a 24-bit kernel.major.minor value without
++   calling any library functions.  If uname() is not supported or if the version
++   format is not recognized, assume the kernel is modern and return true.  */
++
++static inline bool
++prefer_sve_ifuncs (void)
++{
++  struct utsname buf;
++  const char *p = &buf.release[0];
++  int kernel = 0;
++  int val;
++
++  if (__uname (&buf) < 0)
++    return true;
++
++  for (int shift = 16; shift >= 0; shift -= 8)
++    {
++      for (val = 0; *p >= '0' && *p <= '9'; p++)
++	val = val * 10 + *p - '0';
++      kernel |= (val & 255) << shift;
++      if (*p++ != '.')
++	break;
++    }
++
++  if (kernel >= 0x060200 || kernel == 0x050e00)
++    return true;
++  if (kernel >= 0x040f00)
++    return false;
++  return true;
++}
++
++#endif
++
+ static inline void
+ init_cpu_features (struct cpu_features *cpu_features)
+ {
+@@ -126,6 +167,13 @@ init_cpu_features (struct cpu_features *cpu_features)
+   /* Check if SVE is supported.  */
+   cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE;
+ 
++  cpu_features->prefer_sve_ifuncs = cpu_features->sve;
++
++#if __LINUX_KERNEL_VERSION < 0x060200
++  if (cpu_features->sve)
++    cpu_features->prefer_sve_ifuncs = prefer_sve_ifuncs ();
++#endif
++
+   /* Check if MOPS is supported.  */
+   cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS;
+ }
+
+commit 9883f4304cfb1558d0f1e6d9f48c4ab0a35355fe
+Author: H.J. Lu <hjl.tools@gmail.com>
+Date:   Wed Feb 28 09:51:14 2024 -0800
+
+    x86-64: Don't use SSE resolvers for ISA level 3 or above
+    
+    When glibc is built with ISA level 3 or above enabled, SSE resolvers
+    aren't available and glibc fails to build:
+    
+    ld: .../elf/librtld.os: in function `init_cpu_features':
+    .../elf/../sysdeps/x86/cpu-features.c:1200:(.text+0x1445f): undefined reference to `_dl_runtime_resolve_fxsave'
+    ld: .../elf/librtld.os: relocation R_X86_64_PC32 against undefined hidden symbol `_dl_runtime_resolve_fxsave' can not be used when making a shared object
+    /usr/local/bin/ld: final link failed: bad value
+    
+    For ISA level 3 or above, don't use _dl_runtime_resolve_fxsave nor
+    _dl_tlsdesc_dynamic_fxsave.
+    
+    This fixes BZ #31429.
+    Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
+    
+    (cherry picked from commit befe2d3c4dec8be2cdd01a47132e47bdb7020922)
+
+diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
+index 6fe1b728c6..b8abe733ab 100644
+--- a/sysdeps/x86/cpu-features.c
++++ b/sysdeps/x86/cpu-features.c
+@@ -18,6 +18,7 @@
+ 
+ #include <dl-hwcap.h>
+ #include <libc-pointer-arith.h>
++#include <isa-level.h>
+ #include <get-isa-level.h>
+ #include <cacheinfo.h>
+ #include <dl-cacheinfo.h>
+@@ -1198,7 +1199,9 @@ no_cpuid:
+ 	       TUNABLE_CALLBACK (set_x86_shstk));
+ #endif
+ 
++#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
+   if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
++#endif
+     {
+       if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
+ 	{
+@@ -1219,22 +1222,24 @@ no_cpuid:
+ #endif
+ 	}
+     }
++#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
+   else
+     {
+-#ifdef __x86_64__
++# ifdef __x86_64__
+       GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
+-# ifdef SHARED
++#  ifdef SHARED
+       GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
+-# endif
+-#else
+-# ifdef SHARED
++#  endif
++# else
++#  ifdef SHARED
+       if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
+ 	GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
+       else
+ 	GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
++#  endif
+ # endif
+-#endif
+     }
++#endif
+ 
+ #ifdef SHARED
+ # ifdef __x86_64__
+diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
+index ea69f5223a..057a10862a 100644
+--- a/sysdeps/x86_64/dl-tlsdesc.S
++++ b/sysdeps/x86_64/dl-tlsdesc.S
+@@ -20,6 +20,7 @@
+ #include <tls.h>
+ #include <cpu-features-offsets.h>
+ #include <features-offsets.h>
++#include <isa-level.h>
+ #include "tlsdesc.h"
+ #include "dl-trampoline-save.h"
+ 
+@@ -79,12 +80,14 @@ _dl_tlsdesc_undefweak:
+ 	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+ 
+ #ifdef SHARED
+-# define USE_FXSAVE
+-# define STATE_SAVE_ALIGNMENT	16
+-# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_fxsave
+-# include "dl-tlsdesc-dynamic.h"
+-# undef _dl_tlsdesc_dynamic
+-# undef USE_FXSAVE
++# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
++#  define USE_FXSAVE
++#  define STATE_SAVE_ALIGNMENT	16
++#  define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_fxsave
++#  include "dl-tlsdesc-dynamic.h"
++#  undef _dl_tlsdesc_dynamic
++#  undef USE_FXSAVE
++# endif
+ 
+ # define USE_XSAVE
+ # define STATE_SAVE_ALIGNMENT	64
+
+commit 7b92f46f04c6cbce19d19ae1099628431858996c
+Author: Sunil K Pandey <skpgkp2@gmail.com>
+Date:   Thu Feb 29 17:57:02 2024 -0800
+
+    x86-64: Simplify minimum ISA check ifdef conditional with if
+    
+    Replace minimum ISA check ifdef conditional with if.  Since
+    MINIMUM_X86_ISA_LEVEL and AVX_X86_ISA_LEVEL are compile time constants,
+    compiler will perform constant folding optimization, getting same
+    results.
+    
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    (cherry picked from commit b6e3898194bbae78910bbe9cd086937014961e45)
+
+diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
+index b8abe733ab..3d7c2819d7 100644
+--- a/sysdeps/x86/cpu-features.c
++++ b/sysdeps/x86/cpu-features.c
+@@ -1199,9 +1199,8 @@ no_cpuid:
+ 	       TUNABLE_CALLBACK (set_x86_shstk));
+ #endif
+ 
+-#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
+-  if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
+-#endif
++  if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
++      || (GLRO(dl_x86_cpu_features).xsave_state_size != 0))
+     {
+       if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
+ 	{
+@@ -1222,24 +1221,22 @@ no_cpuid:
+ #endif
+ 	}
+     }
+-#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
+   else
+     {
+-# ifdef __x86_64__
++#ifdef __x86_64__
+       GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
+-#  ifdef SHARED
++# ifdef SHARED
+       GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
+-#  endif
+-# else
+-#  ifdef SHARED
++# endif
++#else
++# ifdef SHARED
+       if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
+ 	GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
+       else
+ 	GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
+-#  endif
+ # endif
+-    }
+ #endif
++    }
+ 
+ #ifdef SHARED
+ # ifdef __x86_64__
+
+commit edb9a76e3008725e9dc035d38a58e849a3bde0f1
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Sun Apr 14 08:24:51 2024 +0200
+
+    powerpc: Fix ld.so address determination for PCREL mode (bug 31640)
+    
+    This seems to have stopped working with some GCC 14 versions,
+    which clobber r2.  With other compilers, the kernel-provided
+    r2 value is still available at this point.
+    
+    Reviewed-by: Peter Bergner <bergner@linux.ibm.com>
+    (cherry picked from commit 14e56bd4ce15ac2d1cc43f762eb2e6b83fec1afe)
+
+diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h
+index c6682f3445..2b6f5d2b08 100644
+--- a/sysdeps/powerpc/powerpc64/dl-machine.h
++++ b/sysdeps/powerpc/powerpc64/dl-machine.h
+@@ -78,6 +78,7 @@ elf_host_tolerates_class (const Elf64_Ehdr *ehdr)
+ static inline Elf64_Addr
+ elf_machine_load_address (void) __attribute__ ((const));
+ 
++#ifndef __PCREL__
+ static inline Elf64_Addr
+ elf_machine_load_address (void)
+ {
+@@ -105,6 +106,24 @@ elf_machine_dynamic (void)
+   /* Then subtract off the load address offset.  */
+   return runtime_dynamic - elf_machine_load_address() ;
+ }
++#else /* __PCREL__ */
++/* In PCREL mode, r2 may have been clobbered.  Rely on relative
++   relocations instead.  */
++
++static inline ElfW(Addr)
++elf_machine_load_address (void)
++{
++  extern const ElfW(Ehdr) __ehdr_start attribute_hidden;
++  return (ElfW(Addr)) &__ehdr_start;
++}
++
++static inline ElfW(Addr)
++elf_machine_dynamic (void)
++{
++  extern ElfW(Dyn) _DYNAMIC[] attribute_hidden;
++  return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address ();
++}
++#endif /* __PCREL__ */
+ 
+ /* The PLT uses Elf64_Rela relocs.  */
+ #define elf_machine_relplt elf_machine_rela
+
+commit 04df8652eb1919da18d54b3dcd6db1675993d45d
+Author: H.J. Lu <hjl.tools@gmail.com>
+Date:   Thu Feb 15 11:19:56 2024 -0800
+
+    Apply the Makefile sorting fix
+    
+    Apply the Makefile sorting fix generated by sort-makefile-lines.py.
+    
+    (cherry picked from commit ef7f4b1fef67430a8f3cfc77fa6aada2add851d7)
+
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+index fe863e1ba4..01762ef526 100644
+--- a/sysdeps/loongarch/lp64/multiarch/Makefile
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -1,52 +1,52 @@
+ ifeq ($(subdir),string)
+ sysdep_routines += \
+-  strlen-aligned \
+-  strlen-lsx \
+-  strlen-lasx \
+-  strnlen-aligned \
+-  strnlen-lsx \
+-  strnlen-lasx \
++  memchr-aligned \
++  memchr-lasx \
++  memchr-lsx \
++  memcmp-aligned \
++  memcmp-lasx \
++  memcmp-lsx \
++  memcpy-aligned \
++  memcpy-unaligned \
++  memmove-lasx \
++  memmove-lsx \
++  memmove-unaligned \
++  memrchr-generic \
++  memrchr-lasx \
++  memrchr-lsx \
++  memset-aligned \
++  memset-lasx \
++  memset-lsx \
++  memset-unaligned \
++  rawmemchr-aligned \
++  rawmemchr-lasx \
++  rawmemchr-lsx \
++  stpcpy-aligned \
++  stpcpy-lasx \
++  stpcpy-lsx \
++  stpcpy-unaligned \
+   strchr-aligned \
+-  strchr-lsx \
+   strchr-lasx \
+-  strrchr-aligned \
+-  strrchr-lsx \
+-  strrchr-lasx \
++  strchr-lsx \
+   strchrnul-aligned \
+-  strchrnul-lsx \
+   strchrnul-lasx \
++  strchrnul-lsx \
+   strcmp-aligned \
+   strcmp-lsx \
+-  strncmp-aligned \
+-  strncmp-lsx \
+   strcpy-aligned \
+-  strcpy-unaligned \
+-  strcpy-lsx \
+   strcpy-lasx \
+-  stpcpy-aligned \
+-  stpcpy-unaligned \
+-  stpcpy-lsx \
+-  stpcpy-lasx \
+-  memcpy-aligned \
+-  memcpy-unaligned \
+-  memmove-unaligned \
+-  memmove-lsx \
+-  memmove-lasx \
+-  rawmemchr-aligned \
+-  rawmemchr-lsx \
+-  rawmemchr-lasx \
+-  memchr-aligned \
+-  memchr-lsx \
+-  memchr-lasx \
+-  memrchr-generic \
+-  memrchr-lsx \
+-  memrchr-lasx \
+-  memset-aligned \
+-  memset-unaligned \
+-  memset-lsx \
+-  memset-lasx \
+-  memcmp-aligned \
+-  memcmp-lsx \
+-  memcmp-lasx \
++  strcpy-lsx \
++  strcpy-unaligned \
++  strlen-aligned \
++  strlen-lasx \
++  strlen-lsx \
++  strncmp-aligned \
++  strncmp-lsx \
++  strnlen-aligned \
++  strnlen-lasx \
++  strnlen-lsx \
++  strrchr-aligned \
++  strrchr-lasx \
++  strrchr-lsx \
+ # sysdep_routines
+ endif
+diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
+index 992aabe43e..5311b594af 100644
+--- a/sysdeps/x86/Makefile
++++ b/sysdeps/x86/Makefile
+@@ -15,18 +15,18 @@ CFLAGS-dl-get-cpu-features.os += $(rtld-early-cflags)
+ CFLAGS-get-cpuid-feature-leaf.o += $(no-stack-protector)
+ 
+ tests += \
+-  tst-get-cpu-features \
+-  tst-get-cpu-features-static \
+   tst-cpu-features-cpuinfo \
+   tst-cpu-features-cpuinfo-static \
+   tst-cpu-features-supports \
+   tst-cpu-features-supports-static \
++  tst-get-cpu-features \
++  tst-get-cpu-features-static \
+   tst-hwcap-tunables \
+ # tests
+ tests-static += \
+-  tst-get-cpu-features-static \
+   tst-cpu-features-cpuinfo-static \
+   tst-cpu-features-supports-static \
++  tst-get-cpu-features-static \
+ # tests-static
+ ifeq (yes,$(have-ifunc))
+ ifeq (yes,$(have-gcc-ifunc))
+diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
+index 9d374a3299..0ede447405 100644
+--- a/sysdeps/x86_64/Makefile
++++ b/sysdeps/x86_64/Makefile
+@@ -252,6 +252,10 @@ sysdep-dl-routines += dl-cet
+ 
+ tests += \
+   tst-cet-legacy-1 \
++  tst-cet-legacy-10 \
++  tst-cet-legacy-10-static \
++  tst-cet-legacy-10a \
++  tst-cet-legacy-10a-static \
+   tst-cet-legacy-1a \
+   tst-cet-legacy-2 \
+   tst-cet-legacy-2a \
+@@ -263,15 +267,11 @@ tests += \
+   tst-cet-legacy-8 \
+   tst-cet-legacy-9 \
+   tst-cet-legacy-9-static \
+-  tst-cet-legacy-10 \
+-  tst-cet-legacy-10-static \
+-  tst-cet-legacy-10a \
+-  tst-cet-legacy-10a-static \
+ # tests
+ tests-static += \
+-  tst-cet-legacy-9-static \
+   tst-cet-legacy-10-static \
+   tst-cet-legacy-10a-static \
++  tst-cet-legacy-9-static \
+ # tests-static
+ tst-cet-legacy-1a-ARGS = -- $(host-test-program-cmd)
+ 
+diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
+index ea81753b70..e1a490dd98 100644
+--- a/sysdeps/x86_64/fpu/multiarch/Makefile
++++ b/sysdeps/x86_64/fpu/multiarch/Makefile
+@@ -4,10 +4,10 @@ libm-sysdep_routines += \
+   s_ceilf-c \
+   s_floor-c \
+   s_floorf-c \
+-  s_rint-c \
+-  s_rintf-c \
+   s_nearbyint-c \
+   s_nearbyintf-c \
++  s_rint-c \
++  s_rintf-c \
+   s_roundeven-c \
+   s_roundevenf-c \
+   s_trunc-c \
+@@ -21,10 +21,10 @@ libm-sysdep_routines += \
+   s_floorf-sse4_1 \
+   s_nearbyint-sse4_1 \
+   s_nearbyintf-sse4_1 \
+-  s_roundeven-sse4_1 \
+-  s_roundevenf-sse4_1 \
+   s_rint-sse4_1 \
+   s_rintf-sse4_1 \
++  s_roundeven-sse4_1 \
++  s_roundevenf-sse4_1 \
+   s_trunc-sse4_1 \
+   s_truncf-sse4_1 \
+ # libm-sysdep_routines
+@@ -84,12 +84,12 @@ CFLAGS-s_cosf-fma.c = -mfma -mavx2
+ CFLAGS-s_sincosf-fma.c = -mfma -mavx2
+ 
+ libm-sysdep_routines += \
++  e_asin-fma4 \
++  e_atan2-fma4 \
+   e_exp-fma4 \
+   e_log-fma4 \
+   e_pow-fma4 \
+-  e_asin-fma4 \
+   s_atan-fma4 \
+-  e_atan2-fma4 \
+   s_sin-fma4 \
+   s_sincos-fma4 \
+   s_tan-fma4 \
+@@ -106,10 +106,10 @@ CFLAGS-s_tan-fma4.c = -mfma4
+ CFLAGS-s_sincos-fma4.c = -mfma4
+ 
+ libm-sysdep_routines += \
++  e_atan2-avx \
+   e_exp-avx \
+   e_log-avx \
+   s_atan-avx \
+-  e_atan2-avx \
+   s_sin-avx \
+   s_sincos-avx \
+   s_tan-avx \
+diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
+index e1e894c963..d3d2270394 100644
+--- a/sysdeps/x86_64/multiarch/Makefile
++++ b/sysdeps/x86_64/multiarch/Makefile
+@@ -4,8 +4,8 @@ sysdep_routines += \
+   memchr-avx2 \
+   memchr-avx2-rtm \
+   memchr-evex \
+-  memchr-evex512 \
+   memchr-evex-rtm \
++  memchr-evex512 \
+   memchr-sse2 \
+   memcmp-avx2-movbe \
+   memcmp-avx2-movbe-rtm \
+@@ -37,8 +37,8 @@ sysdep_routines += \
+   rawmemchr-avx2 \
+   rawmemchr-avx2-rtm \
+   rawmemchr-evex \
+-  rawmemchr-evex512 \
+   rawmemchr-evex-rtm \
++  rawmemchr-evex512 \
+   rawmemchr-sse2 \
+   stpcpy-avx2 \
+   stpcpy-avx2-rtm \
+
+commit 423099a03264ea28298f47355d7811b8efe03c97
+Author: Sunil K Pandey <skpgkp2@gmail.com>
+Date:   Tue Feb 13 12:23:14 2024 -0800
+
+    x86_64: Exclude SSE, AVX and FMA4 variants in libm multiarch
+    
+    When glibc is built with ISA level 3 or higher by default, the resulting
+    glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
+    FMA4 variants in libm multiarch when ISA level 3 or higher is enabled by
+    default.
+    
+    When glibc is built with ISA level 2 enabled by default, only keep SSE4.1
+    variant.
+    
+    Fixes BZ 31335.
+    
+    NB: elf/tst-valgrind-smoke test fails with ISA level 4, because valgrind
+    doesn't support AVX512 instructions:
+    
+    https://bugs.kde.org/show_bug.cgi?id=383010
+    
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    (cherry picked from commit 9f78a7c1d0963282608da836b840f0d5ae1c478e)
+
+diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure
+index 1f4c2d67fd..2a5421bb31 100644
+--- a/sysdeps/x86/configure
++++ b/sysdeps/x86/configure
+@@ -98,6 +98,7 @@ printf "%s\n" "$libc_cv_have_x86_lahf_sahf" >&6; }
+   if test $libc_cv_have_x86_lahf_sahf = yes; then
+     printf "%s\n" "#define HAVE_X86_LAHF_SAHF 1" >>confdefs.h
+ 
++    ISAFLAG="-DHAVE_X86_LAHF_SAHF"
+   fi
+   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for MOVBE instruction support" >&5
+ printf %s "checking for MOVBE instruction support... " >&6; }
+@@ -120,9 +121,41 @@ printf "%s\n" "$libc_cv_have_x86_movbe" >&6; }
+   if test $libc_cv_have_x86_movbe = yes; then
+     printf "%s\n" "#define HAVE_X86_MOVBE 1" >>confdefs.h
+ 
++    ISAFLAG="$ISAFLAG -DHAVE_X86_MOVBE"
+   fi
++
++  # Check for ISA level support.
++  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ISA level support" >&5
++printf %s "checking for ISA level support... " >&6; }
++if test ${libc_cv_have_x86_isa_level+y}
++then :
++  printf %s "(cached) " >&6
++else $as_nop
++  cat > conftest.c <<EOF
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL >= 4
++libc_cv_have_x86_isa_level=4
++#elif MINIMUM_X86_ISA_LEVEL == 3
++libc_cv_have_x86_isa_level=3
++#elif MINIMUM_X86_ISA_LEVEL == 2
++libc_cv_have_x86_isa_level=2
++#else
++libc_cv_have_x86_isa_level=baseline
++#endif
++EOF
++		 eval `${CC-cc} $CFLAGS $CPPFLAGS $ISAFLAG -I$srcdir -E conftest.c | grep libc_cv_have_x86_isa_level`
++		 rm -rf conftest*
++fi
++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_have_x86_isa_level" >&5
++printf "%s\n" "$libc_cv_have_x86_isa_level" >&6; }
++else
++  libc_cv_have_x86_isa_level=baseline
+ fi
+ config_vars="$config_vars
++have-x86-isa-level = $libc_cv_have_x86_isa_level"
++config_vars="$config_vars
++x86-isa-level-3-or-above = 3 4"
++config_vars="$config_vars
+ enable-x86-isa-level = $libc_cv_include_x86_isa_level"
+ 
+ printf "%s\n" "#define SUPPORT_STATIC_PIE 1" >>confdefs.h
+diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac
+index 437a50623b..78ff7c8f41 100644
+--- a/sysdeps/x86/configure.ac
++++ b/sysdeps/x86/configure.ac
+@@ -72,6 +72,7 @@ if test $libc_cv_include_x86_isa_level = yes; then
+     fi])
+   if test $libc_cv_have_x86_lahf_sahf = yes; then
+     AC_DEFINE(HAVE_X86_LAHF_SAHF)
++    ISAFLAG="-DHAVE_X86_LAHF_SAHF"
+   fi
+   AC_CACHE_CHECK([for MOVBE instruction support],
+ 		 libc_cv_have_x86_movbe, [dnl
+@@ -81,8 +82,31 @@ if test $libc_cv_include_x86_isa_level = yes; then
+     fi])
+   if test $libc_cv_have_x86_movbe = yes; then
+     AC_DEFINE(HAVE_X86_MOVBE)
++    ISAFLAG="$ISAFLAG -DHAVE_X86_MOVBE"
+   fi
++
++  # Check for ISA level support.
++  AC_CACHE_CHECK([for ISA level support],
++		 libc_cv_have_x86_isa_level, [dnl
++cat > conftest.c <<EOF
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL >= 4
++libc_cv_have_x86_isa_level=4
++#elif MINIMUM_X86_ISA_LEVEL == 3
++libc_cv_have_x86_isa_level=3
++#elif MINIMUM_X86_ISA_LEVEL == 2
++libc_cv_have_x86_isa_level=2
++#else
++libc_cv_have_x86_isa_level=baseline
++#endif
++EOF
++		 eval `${CC-cc} $CFLAGS $CPPFLAGS $ISAFLAG -I$srcdir -E conftest.c | grep libc_cv_have_x86_isa_level`
++		 rm -rf conftest*])
++else
++  libc_cv_have_x86_isa_level=baseline
+ fi
++LIBC_CONFIG_VAR([have-x86-isa-level], [$libc_cv_have_x86_isa_level])
++LIBC_CONFIG_VAR([x86-isa-level-3-or-above], [3 4])
+ LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level])
+ 
+ dnl Static PIE is supported.
+diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
+index e1a490dd98..6ddd50240c 100644
+--- a/sysdeps/x86_64/fpu/multiarch/Makefile
++++ b/sysdeps/x86_64/fpu/multiarch/Makefile
+@@ -1,49 +1,4 @@
+ ifeq ($(subdir),math)
+-libm-sysdep_routines += \
+-  s_ceil-c \
+-  s_ceilf-c \
+-  s_floor-c \
+-  s_floorf-c \
+-  s_nearbyint-c \
+-  s_nearbyintf-c \
+-  s_rint-c \
+-  s_rintf-c \
+-  s_roundeven-c \
+-  s_roundevenf-c \
+-  s_trunc-c \
+-  s_truncf-c \
+-# libm-sysdep_routines
+-
+-libm-sysdep_routines += \
+-  s_ceil-sse4_1 \
+-  s_ceilf-sse4_1 \
+-  s_floor-sse4_1 \
+-  s_floorf-sse4_1 \
+-  s_nearbyint-sse4_1 \
+-  s_nearbyintf-sse4_1 \
+-  s_rint-sse4_1 \
+-  s_rintf-sse4_1 \
+-  s_roundeven-sse4_1 \
+-  s_roundevenf-sse4_1 \
+-  s_trunc-sse4_1 \
+-  s_truncf-sse4_1 \
+-# libm-sysdep_routines
+-
+-libm-sysdep_routines += \
+-  e_asin-fma \
+-  e_atan2-fma \
+-  e_exp-fma \
+-  e_log-fma \
+-  e_log2-fma \
+-  e_pow-fma \
+-  s_atan-fma \
+-  s_expm1-fma \
+-  s_log1p-fma \
+-  s_sin-fma \
+-  s_sincos-fma \
+-  s_tan-fma \
+-# libm-sysdep_routines
+-
+ CFLAGS-e_asin-fma.c = -mfma -mavx2
+ CFLAGS-e_atan2-fma.c = -mfma -mavx2
+ CFLAGS-e_exp-fma.c = -mfma -mavx2
+@@ -57,23 +12,6 @@ CFLAGS-s_sin-fma.c = -mfma -mavx2
+ CFLAGS-s_tan-fma.c = -mfma -mavx2
+ CFLAGS-s_sincos-fma.c = -mfma -mavx2
+ 
+-libm-sysdep_routines += \
+-  s_cosf-sse2 \
+-  s_sincosf-sse2 \
+-  s_sinf-sse2 \
+-# libm-sysdep_routines
+-
+-libm-sysdep_routines += \
+-  e_exp2f-fma \
+-  e_expf-fma \
+-  e_log2f-fma \
+-  e_logf-fma \
+-  e_powf-fma \
+-  s_cosf-fma \
+-  s_sincosf-fma \
+-  s_sinf-fma \
+-# libm-sysdep_routines
+-
+ CFLAGS-e_exp2f-fma.c = -mfma -mavx2
+ CFLAGS-e_expf-fma.c = -mfma -mavx2
+ CFLAGS-e_log2f-fma.c = -mfma -mavx2
+@@ -83,17 +21,93 @@ CFLAGS-s_sinf-fma.c = -mfma -mavx2
+ CFLAGS-s_cosf-fma.c = -mfma -mavx2
+ CFLAGS-s_sincosf-fma.c = -mfma -mavx2
+ 
++# Check if ISA level is 3 or above.
++ifneq (,$(filter $(have-x86-isa-level),$(x86-isa-level-3-or-above)))
+ libm-sysdep_routines += \
++  s_ceil-avx \
++  s_ceilf-avx \
++  s_floor-avx \
++  s_floorf-avx \
++  s_nearbyint-avx \
++  s_nearbyintf-avx \
++  s_rint-avx \
++  s_rintf-avx \
++  s_roundeven-avx \
++  s_roundevenf-avx \
++  s_trunc-avx \
++  s_truncf-avx \
++# libm-sysdep_routines
++else
++libm-sysdep_routines += \
++  e_asin-fma \
+   e_asin-fma4 \
++  e_atan2-avx \
++  e_atan2-fma \
+   e_atan2-fma4 \
++  e_exp-avx \
++  e_exp-fma \
+   e_exp-fma4 \
++  e_exp2f-fma \
++  e_expf-fma \
++  e_log-avx \
++  e_log-fma \
+   e_log-fma4 \
++  e_log2-fma \
++  e_log2f-fma \
++  e_logf-fma \
++  e_pow-fma \
+   e_pow-fma4 \
++  e_powf-fma \
++  s_atan-avx \
++  s_atan-fma \
+   s_atan-fma4 \
++  s_ceil-sse4_1 \
++  s_ceilf-sse4_1 \
++  s_cosf-fma \
++  s_cosf-sse2 \
++  s_expm1-fma \
++  s_floor-sse4_1 \
++  s_floorf-sse4_1 \
++  s_log1p-fma \
++  s_nearbyint-sse4_1 \
++  s_nearbyintf-sse4_1 \
++  s_rint-sse4_1 \
++  s_rintf-sse4_1 \
++  s_roundeven-sse4_1 \
++  s_roundevenf-sse4_1 \
++  s_sin-avx \
++  s_sin-fma \
+   s_sin-fma4 \
++  s_sincos-avx \
++  s_sincos-fma \
+   s_sincos-fma4 \
++  s_sincosf-fma \
++  s_sincosf-sse2 \
++  s_sinf-fma \
++  s_sinf-sse2 \
++  s_tan-avx \
++  s_tan-fma \
+   s_tan-fma4 \
++  s_trunc-sse4_1 \
++  s_truncf-sse4_1 \
+ # libm-sysdep_routines
++ifeq ($(have-x86-isa-level),baseline)
++libm-sysdep_routines += \
++  s_ceil-c \
++  s_ceilf-c \
++  s_floor-c \
++  s_floorf-c \
++  s_nearbyint-c \
++  s_nearbyintf-c \
++  s_rint-c \
++  s_rintf-c \
++  s_roundeven-c \
++  s_roundevenf-c \
++  s_trunc-c \
++  s_truncf-c \
++# libm-sysdep_routines
++endif
++endif
+ 
+ CFLAGS-e_asin-fma4.c = -mfma4
+ CFLAGS-e_atan2-fma4.c = -mfma4
+@@ -105,16 +119,6 @@ CFLAGS-s_sin-fma4.c = -mfma4
+ CFLAGS-s_tan-fma4.c = -mfma4
+ CFLAGS-s_sincos-fma4.c = -mfma4
+ 
+-libm-sysdep_routines += \
+-  e_atan2-avx \
+-  e_exp-avx \
+-  e_log-avx \
+-  s_atan-avx \
+-  s_sin-avx \
+-  s_sincos-avx \
+-  s_tan-avx \
+-# libm-sysdep_routines
+-
+ CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
+ CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
+ CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
+diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c
+index 2eaa6c2c04..d64fca2586 100644
+--- a/sysdeps/x86_64/fpu/multiarch/e_asin.c
++++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c
+@@ -16,26 +16,29 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-finite.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-finite.h>
+ 
+ extern double __redirect_ieee754_asin (double);
+ extern double __redirect_ieee754_acos (double);
+ 
+-#define SYMBOL_NAME ieee754_asin
+-#include "ifunc-fma4.h"
++# define SYMBOL_NAME ieee754_asin
++# include "ifunc-fma4.h"
+ 
+ libc_ifunc_redirected (__redirect_ieee754_asin, __ieee754_asin,
+ 		       IFUNC_SELECTOR ());
+ libm_alias_finite (__ieee754_asin, __asin)
+ 
+-#undef SYMBOL_NAME
+-#define SYMBOL_NAME ieee754_acos
+-#include "ifunc-fma4.h"
++# undef SYMBOL_NAME
++# define SYMBOL_NAME ieee754_acos
++# include "ifunc-fma4.h"
+ 
+ libc_ifunc_redirected (__redirect_ieee754_acos, __ieee754_acos,
+ 		       IFUNC_SELECTOR ());
+ libm_alias_finite (__ieee754_acos, __acos)
+ 
+-#define __ieee754_acos __ieee754_acos_sse2
+-#define __ieee754_asin __ieee754_asin_sse2
++# define __ieee754_acos __ieee754_acos_sse2
++# define __ieee754_asin __ieee754_asin_sse2
++#endif
+ #include <sysdeps/ieee754/dbl-64/e_asin.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
+index 17ee4f3c36..8a86c14ded 100644
+--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
++++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
+@@ -16,16 +16,19 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-finite.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-finite.h>
+ 
+ extern double __redirect_ieee754_atan2 (double, double);
+ 
+-#define SYMBOL_NAME ieee754_atan2
+-#include "ifunc-avx-fma4.h"
++# define SYMBOL_NAME ieee754_atan2
++# include "ifunc-avx-fma4.h"
+ 
+ libc_ifunc_redirected (__redirect_ieee754_atan2,
+ 		       __ieee754_atan2, IFUNC_SELECTOR ());
+ libm_alias_finite (__ieee754_atan2, __atan2)
+ 
+-#define __ieee754_atan2 __ieee754_atan2_sse2
++# define __ieee754_atan2 __ieee754_atan2_sse2
++#endif
+ #include <sysdeps/ieee754/dbl-64/e_atan2.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
+index 406b7ebd44..d56329291a 100644
+--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
++++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
+@@ -16,17 +16,20 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <math.h>
+-#include <libm-alias-finite.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <math.h>
++# include <libm-alias-finite.h>
+ 
+ extern double __redirect_ieee754_exp (double);
+ 
+-#define SYMBOL_NAME ieee754_exp
+-#include "ifunc-avx-fma4.h"
++# define SYMBOL_NAME ieee754_exp
++# include "ifunc-avx-fma4.h"
+ 
+ libc_ifunc_redirected (__redirect_ieee754_exp, __ieee754_exp,
+ 		       IFUNC_SELECTOR ());
+ libm_alias_finite (__ieee754_exp, __exp)
+ 
+-#define __exp __ieee754_exp_sse2
++# define __exp __ieee754_exp_sse2
++#endif
+ #include <sysdeps/ieee754/dbl-64/e_exp.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
+index 804fd6be85..06fe5028d6 100644
+--- a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
++++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
+@@ -16,25 +16,28 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-float.h>
+-#include <libm-alias-finite.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-float.h>
++# include <libm-alias-finite.h>
+ 
+ extern float __redirect_exp2f (float);
+ 
+-#define SYMBOL_NAME exp2f
+-#include "ifunc-fma.h"
++# define SYMBOL_NAME exp2f
++# include "ifunc-fma.h"
+ 
+ libc_ifunc_redirected (__redirect_exp2f, __exp2f, IFUNC_SELECTOR ());
+ 
+-#ifdef SHARED
++# ifdef SHARED
+ versioned_symbol (libm, __ieee754_exp2f, exp2f, GLIBC_2_27);
+ libm_alias_float_other (__exp2, exp2)
+-#else
++# else
+ libm_alias_float (__exp2, exp2)
+-#endif
++# endif
+ 
+ strong_alias (__exp2f, __ieee754_exp2f)
+ libm_alias_finite (__exp2f, __exp2f)
+ 
+-#define __exp2f __exp2f_sse2
++# define __exp2f __exp2f_sse2
++#endif
+ #include <sysdeps/ieee754/flt-32/e_exp2f.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf.c b/sysdeps/x86_64/fpu/multiarch/e_expf.c
+index 4a7e2a5bce..19d767f636 100644
+--- a/sysdeps/x86_64/fpu/multiarch/e_expf.c
++++ b/sysdeps/x86_64/fpu/multiarch/e_expf.c
+@@ -16,28 +16,31 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-float.h>
+-#include <libm-alias-finite.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-float.h>
++# include <libm-alias-finite.h>
+ 
+ extern float __redirect_expf (float);
+ 
+-#define SYMBOL_NAME expf
+-#include "ifunc-fma.h"
++# define SYMBOL_NAME expf
++# include "ifunc-fma.h"
+ 
+ libc_ifunc_redirected (__redirect_expf, __expf, IFUNC_SELECTOR ());
+ 
+-#ifdef SHARED
++# ifdef SHARED
+ __hidden_ver1 (__expf, __GI___expf, __redirect_expf)
+   __attribute__ ((visibility ("hidden")));
+ 
+ versioned_symbol (libm, __ieee754_expf, expf, GLIBC_2_27);
+ libm_alias_float_other (__exp, exp)
+-#else
++# else
+ libm_alias_float (__exp, exp)
+-#endif
++# endif
+ 
+ strong_alias (__expf, __ieee754_expf)
+ libm_alias_finite (__expf, __expf)
+ 
+-#define __expf __expf_sse2
++# define __expf __expf_sse2
++#endif
+ #include <sysdeps/ieee754/flt-32/e_expf.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
+index 067fbf58c3..d80c1b1463 100644
+--- a/sysdeps/x86_64/fpu/multiarch/e_log.c
++++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
+@@ -16,17 +16,20 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <math.h>
+-#include <libm-alias-finite.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <math.h>
++# include <libm-alias-finite.h>
+ 
+ extern double __redirect_ieee754_log (double);
+ 
+-#define SYMBOL_NAME ieee754_log
+-#include "ifunc-avx-fma4.h"
++# define SYMBOL_NAME ieee754_log
++# include "ifunc-avx-fma4.h"
+ 
+ libc_ifunc_redirected (__redirect_ieee754_log, __ieee754_log,
+ 		       IFUNC_SELECTOR ());
+ libm_alias_finite (__ieee754_log, __log)
+ 
+-#define __log __ieee754_log_sse2
++# define __log __ieee754_log_sse2
++#endif
+ #include <sysdeps/ieee754/dbl-64/e_log.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2.c b/sysdeps/x86_64/fpu/multiarch/e_log2.c
+index 9c57a2f6cc..9686782c09 100644
+--- a/sysdeps/x86_64/fpu/multiarch/e_log2.c
++++ b/sysdeps/x86_64/fpu/multiarch/e_log2.c
+@@ -16,28 +16,31 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-double.h>
+-#include <libm-alias-finite.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-double.h>
++# include <libm-alias-finite.h>
+ 
+ extern double __redirect_log2 (double);
+ 
+-#define SYMBOL_NAME log2
+-#include "ifunc-fma.h"
++# define SYMBOL_NAME log2
++# include "ifunc-fma.h"
+ 
+ libc_ifunc_redirected (__redirect_log2, __log2, IFUNC_SELECTOR ());
+ 
+-#ifdef SHARED
++# ifdef SHARED
+ __hidden_ver1 (__log2, __GI___log2, __redirect_log2)
+   __attribute__ ((visibility ("hidden")));
+ 
+ versioned_symbol (libm, __ieee754_log2, log2, GLIBC_2_29);
+ libm_alias_double_other (__log2, log2)
+-#else
++# else
+ libm_alias_double (__log2, log2)
+-#endif
++# endif
+ 
+ strong_alias (__log2, __ieee754_log2)
+ libm_alias_finite (__log2, __log2)
+ 
+-#define __log2 __log2_sse2
++# define __log2 __log2_sse2
++#endif
+ #include <sysdeps/ieee754/dbl-64/e_log2.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f.c b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
+index 2b45c87f38..8ada46e11e 100644
+--- a/sysdeps/x86_64/fpu/multiarch/e_log2f.c
++++ b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
+@@ -16,28 +16,31 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-float.h>
+-#include <libm-alias-finite.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-float.h>
++# include <libm-alias-finite.h>
+ 
+ extern float __redirect_log2f (float);
+ 
+-#define SYMBOL_NAME log2f
+-#include "ifunc-fma.h"
++# define SYMBOL_NAME log2f
++# include "ifunc-fma.h"
+ 
+ libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ());
+ 
+-#ifdef SHARED
++# ifdef SHARED
+ __hidden_ver1 (__log2f, __GI___log2f, __redirect_log2f)
+   __attribute__ ((visibility ("hidden")));
+ 
+ versioned_symbol (libm, __ieee754_log2f, log2f, GLIBC_2_27);
+ libm_alias_float_other (__log2, log2)
+-#else
++# else
+ libm_alias_float (__log2, log2)
+-#endif
++# endif
+ 
+ strong_alias (__log2f, __ieee754_log2f)
+ libm_alias_finite (__log2f, __log2f)
+ 
+-#define __log2f __log2f_sse2
++# define __log2f __log2f_sse2
++#endif
+ #include <sysdeps/ieee754/flt-32/e_log2f.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf.c b/sysdeps/x86_64/fpu/multiarch/e_logf.c
+index 97e23c8fea..a3978d9a8e 100644
+--- a/sysdeps/x86_64/fpu/multiarch/e_logf.c
++++ b/sysdeps/x86_64/fpu/multiarch/e_logf.c
+@@ -16,28 +16,31 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-float.h>
+-#include <libm-alias-finite.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-float.h>
++# include <libm-alias-finite.h>
+ 
+ extern float __redirect_logf (float);
+ 
+-#define SYMBOL_NAME logf
+-#include "ifunc-fma.h"
++# define SYMBOL_NAME logf
++# include "ifunc-fma.h"
+ 
+ libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ());
+ 
+-#ifdef SHARED
++# ifdef SHARED
+ __hidden_ver1 (__logf, __GI___logf, __redirect_logf)
+   __attribute__ ((visibility ("hidden")));
+ 
+ versioned_symbol (libm, __ieee754_logf, logf, GLIBC_2_27);
+ libm_alias_float_other (__log, log)
+-#else
++# else
+ libm_alias_float (__log, log)
+-#endif
++# endif
+ 
+ strong_alias (__logf, __ieee754_logf)
+ libm_alias_finite (__logf, __logf)
+ 
+-#define __logf __logf_sse2
++# define __logf __logf_sse2
++#endif
+ #include <sysdeps/ieee754/flt-32/e_logf.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c
+index 42618e7112..f8f17aff9f 100644
+--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c
++++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
+@@ -16,17 +16,20 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <math.h>
+-#include <libm-alias-finite.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <math.h>
++# include <libm-alias-finite.h>
+ 
+ extern double __redirect_ieee754_pow (double, double);
+ 
+-#define SYMBOL_NAME ieee754_pow
+-#include "ifunc-fma4.h"
++# define SYMBOL_NAME ieee754_pow
++# include "ifunc-fma4.h"
+ 
+ libc_ifunc_redirected (__redirect_ieee754_pow,
+ 		       __ieee754_pow, IFUNC_SELECTOR ());
+ libm_alias_finite (__ieee754_pow, __pow)
+ 
+-#define __pow __ieee754_pow_sse2
++# define __pow __ieee754_pow_sse2
++#endif
+ #include <sysdeps/ieee754/dbl-64/e_pow.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf.c b/sysdeps/x86_64/fpu/multiarch/e_powf.c
+index 8e6ce13cc1..8b1a4c7d04 100644
+--- a/sysdeps/x86_64/fpu/multiarch/e_powf.c
++++ b/sysdeps/x86_64/fpu/multiarch/e_powf.c
+@@ -16,31 +16,34 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-float.h>
+-#include <libm-alias-finite.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-float.h>
++# include <libm-alias-finite.h>
+ 
+-#define powf __redirect_powf
+-#define __DECL_SIMD___redirect_powf
+-#include <math.h>
+-#undef powf
++# define powf __redirect_powf
++# define __DECL_SIMD___redirect_powf
++# include <math.h>
++# undef powf
+ 
+-#define SYMBOL_NAME powf
+-#include "ifunc-fma.h"
++# define SYMBOL_NAME powf
++# include "ifunc-fma.h"
+ 
+ libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ());
+ 
+-#ifdef SHARED
++# ifdef SHARED
+ __hidden_ver1 (__powf, __GI___powf, __redirect_powf)
+   __attribute__ ((visibility ("hidden")));
+ 
+ versioned_symbol (libm, __ieee754_powf, powf, GLIBC_2_27);
+ libm_alias_float_other (__pow, pow)
+-#else
++# else
+ libm_alias_float (__pow, pow)
+-#endif
++# endif
+ 
+ strong_alias (__powf, __ieee754_powf)
+ libm_alias_finite (__powf, __powf)
+ 
+-#define __powf __powf_sse2
++# define __powf __powf_sse2
++#endif
+ #include <sysdeps/ieee754/flt-32/e_powf.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
+index 71bad096a9..4d2c6ce006 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
+@@ -16,15 +16,18 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-double.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-double.h>
+ 
+ extern double __redirect_atan (double);
+ 
+-#define SYMBOL_NAME atan
+-#include "ifunc-avx-fma4.h"
++# define SYMBOL_NAME atan
++# include "ifunc-avx-fma4.h"
+ 
+ libc_ifunc_redirected (__redirect_atan, __atan, IFUNC_SELECTOR ());
+ libm_alias_double (__atan, atan)
+ 
+-#define __atan __atan_sse2
++# define __atan __atan_sse2
++#endif
+ #include <sysdeps/ieee754/dbl-64/s_atan.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
+new file mode 100644
+index 0000000000..e6c1106753
+--- /dev/null
++++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
+@@ -0,0 +1,28 @@
++/* AVX implementation of ceil function.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <libm-alias-double.h>
++
++	.text
++ENTRY(__ceil)
++	vroundsd $10, %xmm0, %xmm0, %xmm0
++	ret
++END(__ceil)
++
++libm_alias_double (__ceil, ceil)
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
+index 64119011ad..dba756c38f 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
++++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
+@@ -17,8 +17,20 @@
+ 
+ #include <sysdep.h>
+ 
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-double.h>
++# define __ceil_sse41 __ceil
++	.text
++#else
+ 	.section .text.sse4.1,"ax",@progbits
++#endif
++
+ ENTRY(__ceil_sse41)
+ 	roundsd	$10, %xmm0, %xmm0
+ 	ret
+ END(__ceil_sse41)
++
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++libm_alias_double (__ceil, ceil)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.c b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
+index cc028addee..46c8e91e19 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_ceil.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
+@@ -16,17 +16,20 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#define NO_MATH_REDIRECT
+-#include <libm-alias-double.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
++# define NO_MATH_REDIRECT
++# include <libm-alias-double.h>
+ 
+-#define ceil __redirect_ceil
+-#define __ceil __redirect___ceil
+-#include <math.h>
+-#undef ceil
+-#undef __ceil
++# define ceil __redirect_ceil
++# define __ceil __redirect___ceil
++# include <math.h>
++# undef ceil
++# undef __ceil
+ 
+-#define SYMBOL_NAME ceil
+-#include "ifunc-sse4_1.h"
++# define SYMBOL_NAME ceil
++# include "ifunc-sse4_1.h"
+ 
+ libc_ifunc_redirected (__redirect_ceil, __ceil, IFUNC_SELECTOR ());
+ libm_alias_double (__ceil, ceil)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
+new file mode 100644
+index 0000000000..b4d8ac0455
+--- /dev/null
++++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
+@@ -0,0 +1,28 @@
++/* AVX implementation of ceilf function.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <libm-alias-float.h>
++
++	.text
++ENTRY(__ceilf)
++	vroundss $10, %xmm0, %xmm0, %xmm0
++	ret
++END(__ceilf)
++
++libm_alias_float (__ceil, ceil)
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
+index dd9a9f6b71..9abc87b91a 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
++++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
+@@ -17,8 +17,20 @@
+ 
+ #include <sysdep.h>
+ 
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-float.h>
++# define __ceilf_sse41 __ceilf
++	.text
++#else
+ 	.section .text.sse4.1,"ax",@progbits
++#endif
++
+ ENTRY(__ceilf_sse41)
+ 	roundss	$10, %xmm0, %xmm0
+ 	ret
+ END(__ceilf_sse41)
++
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++libm_alias_float (__ceil, ceil)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
+index 97a0ca7d19..bb53108f73 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
+@@ -16,17 +16,20 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#define NO_MATH_REDIRECT
+-#include <libm-alias-float.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
++# define NO_MATH_REDIRECT
++# include <libm-alias-float.h>
+ 
+-#define ceilf __redirect_ceilf
+-#define __ceilf __redirect___ceilf
+-#include <math.h>
+-#undef ceilf
+-#undef __ceilf
++# define ceilf __redirect_ceilf
++# define __ceilf __redirect___ceilf
++# include <math.h>
++# undef ceilf
++# undef __ceilf
+ 
+-#define SYMBOL_NAME ceilf
+-#include "ifunc-sse4_1.h"
++# define SYMBOL_NAME ceilf
++# include "ifunc-sse4_1.h"
+ 
+ libc_ifunc_redirected (__redirect_ceilf, __ceilf, IFUNC_SELECTOR ());
+ libm_alias_float (__ceil, ceil)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf.c b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
+index 2703c576df..8a02e04538 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_cosf.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
+@@ -16,13 +16,18 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-float.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-float.h>
+ 
+ extern float __redirect_cosf (float);
+ 
+-#define SYMBOL_NAME cosf
+-#include "ifunc-fma.h"
++# define SYMBOL_NAME cosf
++# include "ifunc-fma.h"
+ 
+ libc_ifunc_redirected (__redirect_cosf, __cosf, IFUNC_SELECTOR ());
+ 
+ libm_alias_float (__cos, cos)
++#else
++# include <sysdeps/ieee754/flt-32/s_cosf.c>
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_expm1.c b/sysdeps/x86_64/fpu/multiarch/s_expm1.c
+index 8a2d69f9b2..d58ef3d8f5 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_expm1.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_expm1.c
+@@ -16,21 +16,24 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-double.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-double.h>
+ 
+ extern double __redirect_expm1 (double);
+ 
+-#define SYMBOL_NAME expm1
+-#include "ifunc-fma.h"
++# define SYMBOL_NAME expm1
++# include "ifunc-fma.h"
+ 
+ libc_ifunc_redirected (__redirect_expm1, __expm1, IFUNC_SELECTOR ());
+ libm_alias_double (__expm1, expm1)
+ 
+-#define __expm1 __expm1_sse2
++# define __expm1 __expm1_sse2
+ 
+ /* NB: __expm1 may be expanded to __expm1_sse2 in the following
+    prototypes.  */
+ extern long double __expm1l (long double);
+ extern long double __expm1f128 (long double);
+ 
++#endif
+ #include <sysdeps/ieee754/dbl-64/s_expm1.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
+new file mode 100644
+index 0000000000..ff74b5a8bf
+--- /dev/null
++++ b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
+@@ -0,0 +1,28 @@
++/* AVX implementation of floor function.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <libm-alias-double.h>
++
++	.text
++ENTRY(__floor)
++	vroundsd $9, %xmm0, %xmm0, %xmm0
++	ret
++END(__floor)
++
++libm_alias_double (__floor, floor)
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
+index 2f7521f39f..c9b9b0639b 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
++++ b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
+@@ -17,8 +17,20 @@
+ 
+ #include <sysdep.h>
+ 
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-double.h>
++# define __floor_sse41 __floor
++	.text
++#else
+ 	.section .text.sse4.1,"ax",@progbits
++#endif
++
+ ENTRY(__floor_sse41)
+ 	roundsd	$9, %xmm0, %xmm0
+ 	ret
+ END(__floor_sse41)
++
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++libm_alias_double (__floor, floor)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.c b/sysdeps/x86_64/fpu/multiarch/s_floor.c
+index 8cebd48e10..2c87dd0056 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_floor.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_floor.c
+@@ -16,17 +16,20 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#define NO_MATH_REDIRECT
+-#include <libm-alias-double.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
++# define NO_MATH_REDIRECT
++# include <libm-alias-double.h>
+ 
+-#define floor __redirect_floor
+-#define __floor __redirect___floor
+-#include <math.h>
+-#undef floor
+-#undef __floor
++# define floor __redirect_floor
++# define __floor __redirect___floor
++# include <math.h>
++# undef floor
++# undef __floor
+ 
+-#define SYMBOL_NAME floor
+-#include "ifunc-sse4_1.h"
++# define SYMBOL_NAME floor
++# include "ifunc-sse4_1.h"
+ 
+ libc_ifunc_redirected (__redirect_floor, __floor, IFUNC_SELECTOR ());
+ libm_alias_double (__floor, floor)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
+new file mode 100644
+index 0000000000..c378baae8e
+--- /dev/null
++++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
+@@ -0,0 +1,28 @@
++/* AVX implementation of floorf function.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <libm-alias-float.h>
++
++	.text
++ENTRY(__floorf)
++	vroundss $9, %xmm0, %xmm0, %xmm0
++	ret
++END(__floorf)
++
++libm_alias_float (__floor, floor)
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
+index 5f6020d27d..c2216899db 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
++++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
+@@ -17,8 +17,20 @@
+ 
+ #include <sysdep.h>
+ 
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-float.h>
++# define __floorf_sse41 __floorf
++	.text
++#else
+ 	.section .text.sse4.1,"ax",@progbits
++#endif
++
+ ENTRY(__floorf_sse41)
+ 	roundss	$9, %xmm0, %xmm0
+ 	ret
+ END(__floorf_sse41)
++
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++libm_alias_float (__floor, floor)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.c b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
+index a14e18b03c..a277802b6d 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_floorf.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
+@@ -16,17 +16,20 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#define NO_MATH_REDIRECT
+-#include <libm-alias-float.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
++# define NO_MATH_REDIRECT
++# include <libm-alias-float.h>
+ 
+-#define floorf __redirect_floorf
+-#define __floorf __redirect___floorf
+-#include <math.h>
+-#undef floorf
+-#undef __floorf
++# define floorf __redirect_floorf
++# define __floorf __redirect___floorf
++# include <math.h>
++# undef floorf
++# undef __floorf
+ 
+-#define SYMBOL_NAME floorf
+-#include "ifunc-sse4_1.h"
++# define SYMBOL_NAME floorf
++# include "ifunc-sse4_1.h"
+ 
+ libc_ifunc_redirected (__redirect_floorf, __floorf, IFUNC_SELECTOR ());
+ libm_alias_float (__floor, floor)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_log1p.c b/sysdeps/x86_64/fpu/multiarch/s_log1p.c
+index a8e1a3f21b..3fa1185d81 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_log1p.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_log1p.c
+@@ -16,14 +16,17 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-double.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-double.h>
+ 
+ extern double __redirect_log1p (double);
+ 
+-#define SYMBOL_NAME log1p
+-#include "ifunc-fma.h"
++# define SYMBOL_NAME log1p
++# include "ifunc-fma.h"
+ 
+ libc_ifunc_redirected (__redirect_log1p, __log1p, IFUNC_SELECTOR ());
+ 
+-#define __log1p __log1p_sse2
++# define __log1p __log1p_sse2
++#endif
+ #include <sysdeps/ieee754/dbl-64/s_log1p.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
+new file mode 100644
+index 0000000000..5bfdf73c28
+--- /dev/null
++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
+@@ -0,0 +1,28 @@
++/* AVX implementation of nearbyint function.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <libm-alias-double.h>
++
++	.text
++ENTRY(__nearbyint)
++	vroundsd $0xc, %xmm0, %xmm0, %xmm0
++	ret
++END(__nearbyint)
++
++libm_alias_double (__nearbyint, nearbyint)
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
+index 674f7eb40a..9d84410a1f 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
+@@ -17,8 +17,20 @@
+ 
+ #include <sysdep.h>
+ 
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-double.h>
++# define __nearbyint_sse41 __nearbyint
++	.text
++#else
+ 	.section .text.sse4.1,"ax",@progbits
++#endif
++
+ ENTRY(__nearbyint_sse41)
+ 	roundsd	$0xc, %xmm0, %xmm0
+ 	ret
+ END(__nearbyint_sse41)
++
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++libm_alias_double (__nearbyint, nearbyint)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
+index 693e42dd4e..057a7ca60f 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
+@@ -16,17 +16,20 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-double.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-double.h>
+ 
+-#define nearbyint __redirect_nearbyint
+-#define __nearbyint __redirect___nearbyint
+-#include <math.h>
+-#undef nearbyint
+-#undef __nearbyint
++# define nearbyint __redirect_nearbyint
++# define __nearbyint __redirect___nearbyint
++# include <math.h>
++# undef nearbyint
++# undef __nearbyint
+ 
+-#define SYMBOL_NAME nearbyint
+-#include "ifunc-sse4_1.h"
++# define SYMBOL_NAME nearbyint
++# include "ifunc-sse4_1.h"
+ 
+ libc_ifunc_redirected (__redirect_nearbyint, __nearbyint,
+ 		       IFUNC_SELECTOR ());
+ libm_alias_double (__nearbyint, nearbyint)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
+new file mode 100644
+index 0000000000..1dbaed0324
+--- /dev/null
++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
+@@ -0,0 +1,28 @@
++/* AVX implmentation of nearbyintf function.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <libm-alias-float.h>
++
++	.text
++ENTRY(__nearbyintf)
++	vroundss $0xc, %xmm0, %xmm0, %xmm0
++	ret
++END(__nearbyintf)
++
++libm_alias_float (__nearbyint, nearbyint)
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
+index 5892bd7563..3cf35f92d6 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
+@@ -17,8 +17,20 @@
+ 
+ #include <sysdep.h>
+ 
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-float.h>
++# define __nearbyintf_sse41 __nearbyintf
++	.text
++#else
+ 	.section .text.sse4.1,"ax",@progbits
++#endif
++
+ ENTRY(__nearbyintf_sse41)
+ 	roundss	$0xc, %xmm0, %xmm0
+ 	ret
+ END(__nearbyintf_sse41)
++
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++libm_alias_float (__nearbyint, nearbyint)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
+index a0ac009f4b..41f374ba72 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
+@@ -16,17 +16,20 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-float.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-float.h>
+ 
+-#define nearbyintf __redirect_nearbyintf
+-#define __nearbyintf __redirect___nearbyintf
+-#include <math.h>
+-#undef nearbyintf
+-#undef __nearbyintf
++# define nearbyintf __redirect_nearbyintf
++# define __nearbyintf __redirect___nearbyintf
++# include <math.h>
++# undef nearbyintf
++# undef __nearbyintf
+ 
+-#define SYMBOL_NAME nearbyintf
+-#include "ifunc-sse4_1.h"
++# define SYMBOL_NAME nearbyintf
++# include "ifunc-sse4_1.h"
+ 
+ libc_ifunc_redirected (__redirect_nearbyintf, __nearbyintf,
+ 		       IFUNC_SELECTOR ());
+ libm_alias_float (__nearbyint, nearbyint)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
+new file mode 100644
+index 0000000000..2b403b331f
+--- /dev/null
++++ b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
+@@ -0,0 +1,28 @@
++/* AVX implementation of rint function.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <libm-alias-double.h>
++
++	.text
++ENTRY(__rint)
++	vroundsd $4, %xmm0, %xmm0, %xmm0
++	ret
++END(__rint)
++
++libm_alias_double (__rint, rint)
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
+index 405372991b..8cd9cf759f 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
++++ b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
+@@ -17,8 +17,20 @@
+ 
+ #include <sysdep.h>
+ 
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-double.h>
++# define __rint_sse41 __rint
++	.text
++#else
+ 	.section .text.sse4.1,"ax",@progbits
++#endif
++
+ ENTRY(__rint_sse41)
+ 	roundsd	$4, %xmm0, %xmm0
+ 	ret
+ END(__rint_sse41)
++
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++libm_alias_double (__rint, rint)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.c b/sysdeps/x86_64/fpu/multiarch/s_rint.c
+index 754c87e004..18623b7d99 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_rint.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_rint.c
+@@ -16,17 +16,20 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#define NO_MATH_REDIRECT
+-#include <libm-alias-double.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
++# define NO_MATH_REDIRECT
++# include <libm-alias-double.h>
+ 
+-#define rint __redirect_rint
+-#define __rint __redirect___rint
+-#include <math.h>
+-#undef rint
+-#undef __rint
++# define rint __redirect_rint
++# define __rint __redirect___rint
++# include <math.h>
++# undef rint
++# undef __rint
+ 
+-#define SYMBOL_NAME rint
+-#include "ifunc-sse4_1.h"
++# define SYMBOL_NAME rint
++# include "ifunc-sse4_1.h"
+ 
+ libc_ifunc_redirected (__redirect_rint, __rint, IFUNC_SELECTOR ());
+ libm_alias_double (__rint, rint)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
+new file mode 100644
+index 0000000000..171c2867f4
+--- /dev/null
++++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
+@@ -0,0 +1,28 @@
++/* AVX implementation of rintf function.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <libm-alias-float.h>
++
++	.text
++ENTRY(__rintf)
++	vroundss $4, %xmm0, %xmm0, %xmm0
++	ret
++END(__rintf)
++
++libm_alias_float (__rint, rint)
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
+index 8ac67ce767..fc1e70f0c9 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
++++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
+@@ -17,8 +17,20 @@
+ 
+ #include <sysdep.h>
+ 
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-float.h>
++# define __rintf_sse41 __rintf
++	.text
++#else
+ 	.section .text.sse4.1,"ax",@progbits
++#endif
++
+ ENTRY(__rintf_sse41)
+ 	roundss	$4, %xmm0, %xmm0
+ 	ret
+ END(__rintf_sse41)
++
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++libm_alias_float (__rint, rint)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.c b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
+index e9d6b7a5f2..e275368dec 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_rintf.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
+@@ -16,17 +16,20 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#define NO_MATH_REDIRECT
+-#include <libm-alias-float.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
++# define NO_MATH_REDIRECT
++# include <libm-alias-float.h>
+ 
+-#define rintf __redirect_rintf
+-#define __rintf __redirect___rintf
+-#include <math.h>
+-#undef rintf
+-#undef __rintf
++# define rintf __redirect_rintf
++# define __rintf __redirect___rintf
++# include <math.h>
++# undef rintf
++# undef __rintf
+ 
+-#define SYMBOL_NAME rintf
+-#include "ifunc-sse4_1.h"
++# define SYMBOL_NAME rintf
++# include "ifunc-sse4_1.h"
+ 
+ libc_ifunc_redirected (__redirect_rintf, __rintf, IFUNC_SELECTOR ());
+ libm_alias_float (__rint, rint)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
+new file mode 100644
+index 0000000000..576790355c
+--- /dev/null
++++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
+@@ -0,0 +1,28 @@
++/* AVX implementation of roundeven function.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <libm-alias-double.h>
++
++	.text
++ENTRY(__roundeven)
++	vroundsd $8, %xmm0, %xmm0, %xmm0
++	ret
++END(__roundeven)
++
++libm_alias_double (__roundeven, roundeven)
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
+index 5ef102336b..f00be56c59 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
++++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
+@@ -17,8 +17,20 @@
+ 
+ #include <sysdep.h>
+ 
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-double.h>
++# define __roundeven_sse41 __roundeven
++	.text
++#else
+ 	.section .text.sse4.1,"ax",@progbits
++#endif
++
+ ENTRY(__roundeven_sse41)
+ 	roundsd	$8, %xmm0, %xmm0
+ 	ret
+ END(__roundeven_sse41)
++
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++libm_alias_double (__roundeven, roundeven)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
+index 8737b32e26..139aad088f 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
+@@ -16,16 +16,19 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-double.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-double.h>
+ 
+-#define roundeven __redirect_roundeven
+-#define __roundeven __redirect___roundeven
+-#include <math.h>
+-#undef roundeven
+-#undef __roundeven
++# define roundeven __redirect_roundeven
++# define __roundeven __redirect___roundeven
++# include <math.h>
++# undef roundeven
++# undef __roundeven
+ 
+-#define SYMBOL_NAME roundeven
+-#include "ifunc-sse4_1.h"
++# define SYMBOL_NAME roundeven
++# include "ifunc-sse4_1.h"
+ 
+ libc_ifunc_redirected (__redirect_roundeven, __roundeven, IFUNC_SELECTOR ());
+ libm_alias_double (__roundeven, roundeven)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
+new file mode 100644
+index 0000000000..42c359f4cd
+--- /dev/null
++++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
+@@ -0,0 +1,28 @@
++/* AVX implementation of roundevenf function.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <libm-alias-float.h>
++
++	.text
++ENTRY(__roundevenf)
++	vroundss $8, %xmm0, %xmm0, %xmm0
++	ret
++END(__roundevenf)
++
++libm_alias_float (__roundeven, roundeven)
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
+index 792c90ba07..6b148e4353 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
++++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
+@@ -17,8 +17,20 @@
+ 
+ #include <sysdep.h>
+ 
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-float.h>
++# define __roundevenf_sse41 __roundevenf
++	.text
++#else
+ 	.section .text.sse4.1,"ax",@progbits
++#endif
++
+ ENTRY(__roundevenf_sse41)
+ 	roundss	$8, %xmm0, %xmm0
+ 	ret
+ END(__roundevenf_sse41)
++
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++libm_alias_float (__roundeven, roundeven)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
+index e96016a4d5..2fb090075d 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
+@@ -16,16 +16,19 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-float.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-float.h>
+ 
+-#define roundevenf __redirect_roundevenf
+-#define __roundevenf __redirect___roundevenf
+-#include <math.h>
+-#undef roundevenf
+-#undef __roundevenf
++# define roundevenf __redirect_roundevenf
++# define __roundevenf __redirect___roundevenf
++# include <math.h>
++# undef roundevenf
++# undef __roundevenf
+ 
+-#define SYMBOL_NAME roundevenf
+-#include "ifunc-sse4_1.h"
++# define SYMBOL_NAME roundevenf
++# include "ifunc-sse4_1.h"
+ 
+ libc_ifunc_redirected (__redirect_roundevenf, __roundevenf, IFUNC_SELECTOR ());
+ libm_alias_float (__roundeven, roundeven)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
+index 355cc0092e..21e77943a3 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
+@@ -16,24 +16,27 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-double.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-double.h>
+ 
+ extern double __redirect_sin (double);
+ extern double __redirect_cos (double);
+ 
+-#define SYMBOL_NAME sin
+-#include "ifunc-avx-fma4.h"
++# define SYMBOL_NAME sin
++# include "ifunc-avx-fma4.h"
+ 
+ libc_ifunc_redirected (__redirect_sin, __sin, IFUNC_SELECTOR ());
+ libm_alias_double (__sin, sin)
+ 
+-#undef SYMBOL_NAME
+-#define SYMBOL_NAME cos
+-#include "ifunc-avx-fma4.h"
++# undef SYMBOL_NAME
++# define SYMBOL_NAME cos
++# include "ifunc-avx-fma4.h"
+ 
+ libc_ifunc_redirected (__redirect_cos, __cos, IFUNC_SELECTOR ());
+ libm_alias_double (__cos, cos)
+ 
+-#define __cos __cos_sse2
+-#define __sin __sin_sse2
++# define __cos __cos_sse2
++# define __sin __sin_sse2
++#endif
+ #include <sysdeps/ieee754/dbl-64/s_sin.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincos.c b/sysdeps/x86_64/fpu/multiarch/s_sincos.c
+index 70107e999c..b35757f8de 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_sincos.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_sincos.c
+@@ -16,15 +16,18 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-double.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-double.h>
+ 
+ extern void __redirect_sincos (double, double *, double *);
+ 
+-#define SYMBOL_NAME sincos
+-#include "ifunc-fma4.h"
++# define SYMBOL_NAME sincos
++# include "ifunc-fma4.h"
+ 
+ libc_ifunc_redirected (__redirect_sincos, __sincos, IFUNC_SELECTOR ());
+ libm_alias_double (__sincos, sincos)
+ 
+-#define __sincos __sincos_sse2
++# define __sincos __sincos_sse2
++#endif
+ #include <sysdeps/ieee754/dbl-64/s_sincos.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
+index 80bc028451..0ea9b40e84 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
+@@ -16,13 +16,18 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-float.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-float.h>
+ 
+ extern void __redirect_sincosf (float, float *, float *);
+ 
+-#define SYMBOL_NAME sincosf
+-#include "ifunc-fma.h"
++# define SYMBOL_NAME sincosf
++# include "ifunc-fma.h"
+ 
+ libc_ifunc_redirected (__redirect_sincosf, __sincosf, IFUNC_SELECTOR ());
+ 
+ libm_alias_float (__sincos, sincos)
++#else
++# include <sysdeps/ieee754/flt-32/s_sincosf.c>
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
+index a32b9e9550..c61624e3ee 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_sinf.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
+@@ -16,13 +16,18 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-float.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-float.h>
+ 
+ extern float __redirect_sinf (float);
+ 
+-#define SYMBOL_NAME sinf
+-#include "ifunc-fma.h"
++# define SYMBOL_NAME sinf
++# include "ifunc-fma.h"
+ 
+ libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ());
+ 
+ libm_alias_float (__sin, sin)
++#else
++# include <sysdeps/ieee754/flt-32/s_sinf.c>
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
+index f9a2474a13..125d992ba1 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
+@@ -16,15 +16,18 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#include <libm-alias-double.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
++# include <libm-alias-double.h>
+ 
+ extern double __redirect_tan (double);
+ 
+-#define SYMBOL_NAME tan
+-#include "ifunc-avx-fma4.h"
++# define SYMBOL_NAME tan
++# include "ifunc-avx-fma4.h"
+ 
+ libc_ifunc_redirected (__redirect_tan, __tan, IFUNC_SELECTOR ());
+ libm_alias_double (__tan, tan)
+ 
+-#define __tan __tan_sse2
++# define __tan __tan_sse2
++#endif
+ #include <sysdeps/ieee754/dbl-64/s_tan.c>
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
+new file mode 100644
+index 0000000000..b3e87e9606
+--- /dev/null
++++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
+@@ -0,0 +1,28 @@
++/* AVX implementation of trunc function.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <libm-alias-double.h>
++
++	.text
++ENTRY(__trunc)
++	vroundsd $11, %xmm0, %xmm0, %xmm0
++	ret
++END(__trunc)
++
++libm_alias_double (__trunc, trunc)
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
+index b496a6ef49..2b79174eed 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
++++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
+@@ -18,8 +18,20 @@
+ 
+ #include <sysdep.h>
+ 
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-double.h>
++# define __trunc_sse41 __trunc
++	.text
++#else
+ 	.section .text.sse4.1,"ax",@progbits
++#endif
++
+ ENTRY(__trunc_sse41)
+ 	roundsd	$11, %xmm0, %xmm0
+ 	ret
+ END(__trunc_sse41)
++
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++libm_alias_double (__trunc, trunc)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
+index 9bc9df8744..ea89c4f85d 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_trunc.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
+@@ -16,17 +16,20 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#define NO_MATH_REDIRECT
+-#include <libm-alias-double.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
++# define NO_MATH_REDIRECT
++# include <libm-alias-double.h>
+ 
+-#define trunc __redirect_trunc
+-#define __trunc __redirect___trunc
+-#include <math.h>
+-#undef trunc
+-#undef __trunc
++# define trunc __redirect_trunc
++# define __trunc __redirect___trunc
++# include <math.h>
++# undef trunc
++# undef __trunc
+ 
+-#define SYMBOL_NAME trunc
+-#include "ifunc-sse4_1.h"
++# define SYMBOL_NAME trunc
++# include "ifunc-sse4_1.h"
+ 
+ libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ());
+ libm_alias_double (__trunc, trunc)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
+new file mode 100644
+index 0000000000..f31ac7d7f7
+--- /dev/null
++++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
+@@ -0,0 +1,28 @@
++/* AVX implementation of truncf function.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <libm-alias-float.h>
++
++	.text
++ENTRY(__truncf)
++	vroundss $11, %xmm0, %xmm0, %xmm0
++	ret
++END(__truncf)
++
++libm_alias_float (__trunc, trunc)
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
+index 22e9a83307..60498b2cb2 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
++++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
+@@ -18,8 +18,20 @@
+ 
+ #include <sysdep.h>
+ 
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++# include <libm-alias-float.h>
++# define __truncf_sse41 __truncf
++	.text
++#else
+ 	.section .text.sse4.1,"ax",@progbits
++#endif
++
+ ENTRY(__truncf_sse41)
+ 	roundss	$11, %xmm0, %xmm0
+ 	ret
+ END(__truncf_sse41)
++
++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
++libm_alias_float (__trunc, trunc)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
+index dae01d166a..92435ce39d 100644
+--- a/sysdeps/x86_64/fpu/multiarch/s_truncf.c
++++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
+@@ -16,17 +16,20 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+-#define NO_MATH_REDIRECT
+-#include <libm-alias-float.h>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
++# define NO_MATH_REDIRECT
++# include <libm-alias-float.h>
+ 
+-#define truncf __redirect_truncf
+-#define __truncf __redirect___truncf
+-#include <math.h>
+-#undef truncf
+-#undef __truncf
++# define truncf __redirect_truncf
++# define __truncf __redirect___truncf
++# include <math.h>
++# undef truncf
++# undef __truncf
+ 
+-#define SYMBOL_NAME truncf
+-#include "ifunc-sse4_1.h"
++# define SYMBOL_NAME truncf
++# include "ifunc-sse4_1.h"
+ 
+ libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ());
+ libm_alias_float (__trunc, trunc)
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/w_exp.c b/sysdeps/x86_64/fpu/multiarch/w_exp.c
+index 27eee98a0a..3584187e0e 100644
+--- a/sysdeps/x86_64/fpu/multiarch/w_exp.c
++++ b/sysdeps/x86_64/fpu/multiarch/w_exp.c
+@@ -1 +1,6 @@
+-#include <sysdeps/../math/w_exp.c>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL
++# include <sysdeps/ieee754/dbl-64/w_exp.c>
++#else
++# include <sysdeps/../math/w_exp.c>
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/w_log.c b/sysdeps/x86_64/fpu/multiarch/w_log.c
+index 9b2b018711..414ca3ca3d 100644
+--- a/sysdeps/x86_64/fpu/multiarch/w_log.c
++++ b/sysdeps/x86_64/fpu/multiarch/w_log.c
+@@ -1 +1,6 @@
+-#include <sysdeps/../math/w_log.c>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL
++# include <sysdeps/ieee754/dbl-64/w_log.c>
++#else
++# include <sysdeps/../math/w_log.c>
++#endif
+diff --git a/sysdeps/x86_64/fpu/multiarch/w_pow.c b/sysdeps/x86_64/fpu/multiarch/w_pow.c
+index b50c1988de..d5fcc4f871 100644
+--- a/sysdeps/x86_64/fpu/multiarch/w_pow.c
++++ b/sysdeps/x86_64/fpu/multiarch/w_pow.c
+@@ -1 +1,6 @@
+-#include <sysdeps/../math/w_pow.c>
++#include <sysdeps/x86/isa-level.h>
++#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL
++# include <sysdeps/ieee754/dbl-64/w_pow.c>
++#else
++# include <sysdeps/../math/w_pow.c>
++#endif
+
+commit 31da30f23cddd36db29d5b6a1c7619361b271fb4
+Author: Charles Fol <folcharles@gmail.com>
+Date:   Thu Mar 28 12:25:38 2024 -0300
+
+    iconv: ISO-2022-CN-EXT: fix out-of-bound writes when writing escape sequence (CVE-2024-2961)
+    
+    ISO-2022-CN-EXT uses escape sequences to indicate character set changes
+    (as specified by RFC 1922).  While the SOdesignation has the expected
+    bounds checks, neither SS2designation nor SS3designation have its;
+    allowing a write overflow of 1, 2, or 3 bytes with fixed values:
+    '$+I', '$+J', '$+K', '$+L', '$+M', or '$*H'.
+    
+    Checked on aarch64-linux-gnu.
+    
+    Co-authored-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+    Reviewed-by: Carlos O'Donell <carlos@redhat.com>
+    Tested-by: Carlos O'Donell <carlos@redhat.com>
+    
+    (cherry picked from commit f9dc609e06b1136bb0408be9605ce7973a767ada)
+
+diff --git a/iconvdata/Makefile b/iconvdata/Makefile
+index ea019ce5c0..7196a8744b 100644
+--- a/iconvdata/Makefile
++++ b/iconvdata/Makefile
+@@ -75,7 +75,8 @@ ifeq (yes,$(build-shared))
+ tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
+ 	tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
+ 	bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
+-	bug-iconv13 bug-iconv14 bug-iconv15
++	bug-iconv13 bug-iconv14 bug-iconv15 \
++	tst-iconv-iso-2022-cn-ext
+ ifeq ($(have-thread-library),yes)
+ tests += bug-iconv3
+ endif
+@@ -330,6 +331,8 @@ $(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \
+ 			  $(addprefix $(objpfx),$(modules.so))
+ $(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
+ 			  $(addprefix $(objpfx),$(modules.so))
++$(objpfx)tst-iconv-iso-2022-cn-ext.out: $(addprefix $(objpfx), $(gconv-modules)) \
++					$(addprefix $(objpfx),$(modules.so))
+ 
+ $(objpfx)iconv-test.out: run-iconv-test.sh \
+ 			 $(addprefix $(objpfx), $(gconv-modules)) \
+diff --git a/iconvdata/iso-2022-cn-ext.c b/iconvdata/iso-2022-cn-ext.c
+index b34c8a36f4..cce29b1969 100644
+--- a/iconvdata/iso-2022-cn-ext.c
++++ b/iconvdata/iso-2022-cn-ext.c
+@@ -574,6 +574,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
+ 	      {								      \
+ 		const char *escseq;					      \
+ 									      \
++		if (outptr + 4 > outend)				      \
++		  {							      \
++		    result = __GCONV_FULL_OUTPUT;			      \
++		    break;						      \
++		  }							      \
++									      \
+ 		assert (used == CNS11643_2_set); /* XXX */		      \
+ 		escseq = "*H";						      \
+ 		*outptr++ = ESC;					      \
+@@ -587,6 +593,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
+ 	      {								      \
+ 		const char *escseq;					      \
+ 									      \
++		if (outptr + 4 > outend)				      \
++		  {							      \
++		    result = __GCONV_FULL_OUTPUT;			      \
++		    break;						      \
++		  }							      \
++									      \
+ 		assert ((used >> 5) >= 3 && (used >> 5) <= 7);		      \
+ 		escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2;		      \
+ 		*outptr++ = ESC;					      \
+diff --git a/iconvdata/tst-iconv-iso-2022-cn-ext.c b/iconvdata/tst-iconv-iso-2022-cn-ext.c
+new file mode 100644
+index 0000000000..96a8765fd5
+--- /dev/null
++++ b/iconvdata/tst-iconv-iso-2022-cn-ext.c
+@@ -0,0 +1,128 @@
++/* Verify ISO-2022-CN-EXT does not write out of the bounds.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <stdio.h>
++#include <string.h>
++
++#include <errno.h>
++#include <iconv.h>
++#include <sys/mman.h>
++
++#include <support/xunistd.h>
++#include <support/check.h>
++#include <support/support.h>
++
++/* The test sets up a two memory page buffer with the second page marked
++   PROT_NONE to trigger a fault if the conversion writes beyond the exact
++   expected amount.  Then we carry out various conversions and precisely
++   place the start of the output buffer in order to trigger a SIGSEGV if the
++   process writes anywhere between 1 and page sized bytes more (only one
++   PROT_NONE page is setup as a canary) than expected.  These tests exercise
++   all three of the cases in ISO-2022-CN-EXT where the converter must switch
++   character sets and may run out of buffer space while doing the
++   operation.  */
++
++static int
++do_test (void)
++{
++  iconv_t cd = iconv_open ("ISO-2022-CN-EXT", "UTF-8");
++  TEST_VERIFY_EXIT (cd != (iconv_t) -1);
++
++  char *ntf;
++  size_t ntfsize;
++  char *outbufbase;
++  {
++    int pgz = getpagesize ();
++    TEST_VERIFY_EXIT (pgz > 0);
++    ntfsize = 2 * pgz;
++
++    ntf = xmmap (NULL, ntfsize, PROT_READ | PROT_WRITE, MAP_PRIVATE
++		 | MAP_ANONYMOUS, -1);
++    xmprotect (ntf + pgz, pgz, PROT_NONE);
++
++    outbufbase = ntf + pgz;
++  }
++
++  /* Check if SOdesignation escape sequence does not trigger an OOB write.  */
++  {
++    char inbuf[] = "\xe4\xba\xa4\xe6\x8d\xa2";
++
++    for (int i = 0; i < 9; i++)
++      {
++	char *inp = inbuf;
++	size_t inleft = sizeof (inbuf) - 1;
++
++	char *outp = outbufbase - i;
++	size_t outleft = i;
++
++	TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft)
++			  == (size_t) -1);
++	TEST_COMPARE (errno, E2BIG);
++
++	TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0);
++      }
++  }
++
++  /* Same as before for SS2designation.  */
++  {
++    char inbuf[] = "㴽 \xe3\xb4\xbd";
++
++    for (int i = 0; i < 14; i++)
++      {
++	char *inp = inbuf;
++	size_t inleft = sizeof (inbuf) - 1;
++
++	char *outp = outbufbase - i;
++	size_t outleft = i;
++
++	TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft)
++			  == (size_t) -1);
++	TEST_COMPARE (errno, E2BIG);
++
++	TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0);
++      }
++  }
++
++  /* Same as before for SS3designation.  */
++  {
++    char inbuf[] = "劄 \xe5\x8a\x84";
++
++    for (int i = 0; i < 14; i++)
++      {
++	char *inp = inbuf;
++	size_t inleft = sizeof (inbuf) - 1;
++
++	char *outp = outbufbase - i;
++	size_t outleft = i;
++
++	TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft)
++			  == (size_t) -1);
++	TEST_COMPARE (errno, E2BIG);
++
++	TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0);
++      }
++  }
++
++  TEST_VERIFY_EXIT (iconv_close (cd) != -1);
++
++  xmunmap (ntf, ntfsize);
++
++  return 0;
++}
++
++#include <support/test-driver.c>
+
+commit e828914cf9f2fc2caa5bced0fc6a03cb78324979
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Tue Apr 23 21:16:32 2024 +0200
+
+    nptl: Fix tst-cancel30 on kernels without ppoll_time64 support
+    
+    Fall back to ppoll if ppoll_time64 fails with ENOSYS.
+    Fixes commit 370da8a121c3ba9eeb2f13da15fc0f21f4136b25 ("nptl: Fix
+    tst-cancel30 on sparc64").
+    
+    Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
+    (cherry picked from commit f4724843ada64a51d66f65d3199fe431f9d4c254)
+
+diff --git a/sysdeps/pthread/tst-cancel30.c b/sysdeps/pthread/tst-cancel30.c
+index 3030660e5f..94ad6281bc 100644
+--- a/sysdeps/pthread/tst-cancel30.c
++++ b/sysdeps/pthread/tst-cancel30.c
+@@ -18,6 +18,7 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
++#include <errno.h>
+ #include <support/check.h>
+ #include <support/xstdio.h>
+ #include <support/xthread.h>
+@@ -46,13 +47,19 @@ tf (void *arg)
+ 
+   /* Wait indefinitely for cancellation, which only works if asynchronous
+      cancellation is enabled.  */
+-#if defined SYS_ppoll || defined SYS_ppoll_time64
+-# ifndef SYS_ppoll_time64
+-#  define SYS_ppoll_time64 SYS_ppoll
++#ifdef SYS_ppoll_time64
++  long int ret = syscall (SYS_ppoll_time64, NULL, 0, NULL, NULL);
++  (void) ret;
++# ifdef SYS_ppoll
++  if (ret == -1 && errno == ENOSYS)
++    syscall (SYS_ppoll, NULL, 0, NULL, NULL);
+ # endif
+-  syscall (SYS_ppoll_time64, NULL, 0, NULL, NULL);
+ #else
++# ifdef SYS_ppoll
++  syscall (SYS_ppoll, NULL, 0, NULL, NULL);
++# else
+   for (;;);
++# endif
+ #endif
+ 
+   return 0;
+
+commit e701c7d761f6e5c48d8e9dd5da88cbe2e94943f4
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Thu Apr 25 12:56:48 2024 +0200
+
+    i386: ulp update for SSE2 --disable-multi-arch configurations
+    
+    (cherry picked from commit 3a3a4497421422aa854c855cbe5110ca7d598ffc)
+
+diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
+index 84e6686eba..f2139fc172 100644
+--- a/sysdeps/i386/fpu/libm-test-ulps
++++ b/sysdeps/i386/fpu/libm-test-ulps
+@@ -1232,6 +1232,7 @@ ldouble: 6
+ 
+ Function: "hypot":
+ double: 1
++float: 1
+ float128: 1
+ ldouble: 1
+ 
+
+commit 2f8f157eb0cc7f1d8d9a3fcaa8c55bed53b092a8
+Author: H.J. Lu <hjl.tools@gmail.com>
+Date:   Tue Apr 23 13:59:50 2024 -0700
+
+    x86: Define MINIMUM_X86_ISA_LEVEL in config.h [BZ #31676]
+    
+    Define MINIMUM_X86_ISA_LEVEL at configure time to avoid
+    
+    /usr/bin/ld: …/build/elf/librtld.os: in function `init_cpu_features':
+    …/git/elf/../sysdeps/x86/cpu-features.c:1202: undefined reference to `_dl_runtime_resolve_fxsave'
+    /usr/bin/ld: …/build/elf/librtld.os: relocation R_X86_64_PC32 against undefined hidden symbol `_dl_runtime_resolve_fxsave' can not be used when making a shared object
+    /usr/bin/ld: final link failed: bad value
+    collect2: error: ld returned 1 exit status
+    
+    when glibc is built with -march=x86-64-v3 and configured with
+    --with-rtld-early-cflags=-march=x86-64, which is used to allow ld.so to
+    print an error message on unsupported CPUs:
+    
+    Fatal glibc error: CPU does not support x86-64-v3
+    
+    This fixes BZ #31676.
+    Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
+    
+    (cherry picked from commit 46c999741340ea559784c20a45077955b50aca43)
+
+diff --git a/config.h.in b/config.h.in
+index 4d33c63a84..1e647de585 100644
+--- a/config.h.in
++++ b/config.h.in
+@@ -286,6 +286,9 @@
+ /* Define if x86 ISA level should be included in shared libraries.  */
+ #undef INCLUDE_X86_ISA_LEVEL
+ 
++/* The x86 ISA level.  1 for baseline.  Undefined on non-x86.  */
++#undef MINIMUM_X86_ISA_LEVEL
++
+ /* Define if -msahf is enabled by default on x86.  */
+ #undef HAVE_X86_LAHF_SAHF
+ 
+diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure
+index 2a5421bb31..d28d9bcb29 100644
+--- a/sysdeps/x86/configure
++++ b/sysdeps/x86/configure
+@@ -151,6 +151,13 @@ printf "%s\n" "$libc_cv_have_x86_isa_level" >&6; }
+ else
+   libc_cv_have_x86_isa_level=baseline
+ fi
++if test $libc_cv_have_x86_isa_level = baseline; then
++  printf "%s\n" "#define MINIMUM_X86_ISA_LEVEL 1" >>confdefs.h
++
++else
++  printf "%s\n" "#define MINIMUM_X86_ISA_LEVEL $libc_cv_have_x86_isa_level" >>confdefs.h
++
++fi
+ config_vars="$config_vars
+ have-x86-isa-level = $libc_cv_have_x86_isa_level"
+ config_vars="$config_vars
+diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac
+index 78ff7c8f41..5b0acd03d2 100644
+--- a/sysdeps/x86/configure.ac
++++ b/sysdeps/x86/configure.ac
+@@ -105,6 +105,11 @@ EOF
+ else
+   libc_cv_have_x86_isa_level=baseline
+ fi
++if test $libc_cv_have_x86_isa_level = baseline; then
++  AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, 1)
++else
++  AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, $libc_cv_have_x86_isa_level)
++fi
+ LIBC_CONFIG_VAR([have-x86-isa-level], [$libc_cv_have_x86_isa_level])
+ LIBC_CONFIG_VAR([x86-isa-level-3-or-above], [3 4])
+ LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level])
+diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
+index 11fe1ca90c..2c7f74212b 100644
+--- a/sysdeps/x86/isa-level.h
++++ b/sysdeps/x86/isa-level.h
+@@ -61,8 +61,10 @@
+ # define __X86_ISA_V4 0
+ #endif
+ 
+-#define MINIMUM_X86_ISA_LEVEL                                                 \
++#ifndef MINIMUM_X86_ISA_LEVEL
++# define MINIMUM_X86_ISA_LEVEL                                                 \
+   (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
++#endif
+ 
+ /* Depending on the minimum ISA level, a feature check result can be a
+    compile-time constant.. */
+
+commit 1263d583d2e28afb8be53f8d6922f0842036f35d
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Thu Apr 25 15:00:45 2024 +0200
+
+    CVE-2024-33599: nscd: Stack-based buffer overflow in netgroup cache (bug 31677)
+    
+    Using alloca matches what other caches do.  The request length is
+    bounded by MAXKEYLEN.
+    
+    Reviewed-by: Carlos O'Donell <carlos@redhat.com>
+    (cherry picked from commit 87801a8fd06db1d654eea3e4f7626ff476a9bdaa)
+
+diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
+index 0c6e46f15c..f227dc7fa2 100644
+--- a/nscd/netgroupcache.c
++++ b/nscd/netgroupcache.c
+@@ -502,12 +502,13 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
+       = (struct indataset *) mempool_alloc (db,
+ 					    sizeof (*dataset) + req->key_len,
+ 					    1);
+-  struct indataset dataset_mem;
+   bool cacheable = true;
+   if (__glibc_unlikely (dataset == NULL))
+     {
+       cacheable = false;
+-      dataset = &dataset_mem;
++      /* The alloca is safe because nscd_run_worker verfies that
++	 key_len is not larger than MAXKEYLEN.  */
++      dataset = alloca (sizeof (*dataset) + req->key_len);
+     }
+ 
+   datahead_init_pos (&dataset->head, sizeof (*dataset) + req->key_len,
+
+commit 5a508e0b508c8ad53bd0d2fb48fd71b242626341
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Thu Apr 25 15:01:07 2024 +0200
+
+    CVE-2024-33600: nscd: Do not send missing not-found response in addgetnetgrentX (bug 31678)
+    
+    If we failed to add a not-found response to the cache, the dataset
+    point can be null, resulting in a null pointer dereference.
+    
+    Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
+    (cherry picked from commit 7835b00dbce53c3c87bbbb1754a95fb5e58187aa)
+
+diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
+index f227dc7fa2..c18fe111f3 100644
+--- a/nscd/netgroupcache.c
++++ b/nscd/netgroupcache.c
+@@ -147,7 +147,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+       /* No such service.  */
+       cacheable = do_notfound (db, fd, req, key, &dataset, &total, &timeout,
+ 			       &key_copy);
+-      goto writeout;
++      goto maybe_cache_add;
+     }
+ 
+   memset (&data, '\0', sizeof (data));
+@@ -348,7 +348,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+     {
+       cacheable = do_notfound (db, fd, req, key, &dataset, &total, &timeout,
+ 			       &key_copy);
+-      goto writeout;
++      goto maybe_cache_add;
+     }
+ 
+   total = buffilled;
+@@ -410,14 +410,12 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+   }
+ 
+   if (he == NULL && fd != -1)
+-    {
+-      /* We write the dataset before inserting it to the database
+-	 since while inserting this thread might block and so would
+-	 unnecessarily let the receiver wait.  */
+-    writeout:
++    /* We write the dataset before inserting it to the database since
++       while inserting this thread might block and so would
++       unnecessarily let the receiver wait.  */
+       writeall (fd, &dataset->resp, dataset->head.recsize);
+-    }
+ 
++ maybe_cache_add:
+   if (cacheable)
+     {
+       /* If necessary, we also propagate the data to disk.  */
+
+commit c99f886de54446cd4447db6b44be93dabbdc2f8b
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Thu Apr 25 15:01:07 2024 +0200
+
+    CVE-2024-33600: nscd: Avoid null pointer crashes after notfound response (bug 31678)
+    
+    The addgetnetgrentX call in addinnetgrX may have failed to produce
+    a result, so the result variable in addinnetgrX can be NULL.
+    Use db->negtimeout as the fallback value if there is no result data;
+    the timeout is also overwritten below.
+    
+    Also avoid sending a second not-found response.  (The client
+    disconnects after receiving the first response, so the data stream did
+    not go out of sync even without this fix.)  It is still beneficial to
+    add the negative response to the mapping, so that the client can get
+    it from there in the future, instead of going through the socket.
+    
+    Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
+    (cherry picked from commit b048a482f088e53144d26a61c390bed0210f49f2)
+
+diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
+index c18fe111f3..e22ffa5884 100644
+--- a/nscd/netgroupcache.c
++++ b/nscd/netgroupcache.c
+@@ -511,14 +511,15 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
+ 
+   datahead_init_pos (&dataset->head, sizeof (*dataset) + req->key_len,
+ 		     sizeof (innetgroup_response_header),
+-		     he == NULL ? 0 : dh->nreloads + 1, result->head.ttl);
++		     he == NULL ? 0 : dh->nreloads + 1,
++		     result == NULL ? db->negtimeout : result->head.ttl);
+   /* Set the notfound status and timeout based on the result from
+      getnetgrent.  */
+-  dataset->head.notfound = result->head.notfound;
++  dataset->head.notfound = result == NULL || result->head.notfound;
+   dataset->head.timeout = timeout;
+ 
+   dataset->resp.version = NSCD_VERSION;
+-  dataset->resp.found = result->resp.found;
++  dataset->resp.found = result != NULL && result->resp.found;
+   /* Until we find a matching entry the result is 0.  */
+   dataset->resp.result = 0;
+ 
+@@ -566,7 +567,9 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
+       goto out;
+     }
+ 
+-  if (he == NULL)
++  /* addgetnetgrentX may have already sent a notfound response.  Do
++     not send another one.  */
++  if (he == NULL && dataset->resp.found)
+     {
+       /* We write the dataset before inserting it to the database
+ 	 since while inserting this thread might block and so would
+
+commit a9a8d3eebb145779a18d90e3966009a1daa63cd8
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Thu Apr 25 15:01:07 2024 +0200
+
+    CVE-2024-33601, CVE-2024-33602: nscd: netgroup: Use two buffers in addgetnetgrentX (bug 31680)
+    
+    This avoids potential memory corruption when the underlying NSS
+    callback function does not use the buffer space to store all strings
+    (e.g., for constant strings).
+    
+    Instead of custom buffer management, two scratch buffers are used.
+    This increases stack usage somewhat.
+    
+    Scratch buffer allocation failure is handled by return -1
+    (an invalid timeout value) instead of terminating the process.
+    This fixes bug 31679.
+    
+    Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
+    (cherry picked from commit c04a21e050d64a1193a6daab872bca2528bda44b)
+
+diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
+index e22ffa5884..e8fe041846 100644
+--- a/nscd/netgroupcache.c
++++ b/nscd/netgroupcache.c
+@@ -23,6 +23,7 @@
+ #include <stdlib.h>
+ #include <unistd.h>
+ #include <sys/mman.h>
++#include <scratch_buffer.h>
+ 
+ #include "../nss/netgroup.h"
+ #include "nscd.h"
+@@ -65,6 +66,16 @@ struct dataset
+   char strdata[0];
+ };
+ 
++/* Send a notfound response to FD.  Always returns -1 to indicate an
++   ephemeral error.  */
++static time_t
++send_notfound (int fd)
++{
++  if (fd != -1)
++    TEMP_FAILURE_RETRY (send (fd, &notfound, sizeof (notfound), MSG_NOSIGNAL));
++  return -1;
++}
++
+ /* Sends a notfound message and prepares a notfound dataset to write to the
+    cache.  Returns true if there was enough memory to allocate the dataset and
+    returns the dataset in DATASETP, total bytes to write in TOTALP and the
+@@ -83,8 +94,7 @@ do_notfound (struct database_dyn *db, int fd, request_header *req,
+   total = sizeof (notfound);
+   timeout = time (NULL) + db->negtimeout;
+ 
+-  if (fd != -1)
+-    TEMP_FAILURE_RETRY (send (fd, &notfound, total, MSG_NOSIGNAL));
++  send_notfound (fd);
+ 
+   dataset = mempool_alloc (db, sizeof (struct dataset) + req->key_len, 1);
+   /* If we cannot permanently store the result, so be it.  */
+@@ -109,11 +119,78 @@ do_notfound (struct database_dyn *db, int fd, request_header *req,
+   return cacheable;
+ }
+ 
++struct addgetnetgrentX_scratch
++{
++  /* This is the result that the caller should use.  It can be NULL,
++     point into buffer, or it can be in the cache.  */
++  struct dataset *dataset;
++
++  struct scratch_buffer buffer;
++
++  /* Used internally in addgetnetgrentX as a staging area.  */
++  struct scratch_buffer tmp;
++
++  /* Number of bytes in buffer that are actually used.  */
++  size_t buffer_used;
++};
++
++static void
++addgetnetgrentX_scratch_init (struct addgetnetgrentX_scratch *scratch)
++{
++  scratch->dataset = NULL;
++  scratch_buffer_init (&scratch->buffer);
++  scratch_buffer_init (&scratch->tmp);
++
++  /* Reserve space for the header.  */
++  scratch->buffer_used = sizeof (struct dataset);
++  static_assert (sizeof (struct dataset) < sizeof (scratch->tmp.__space),
++		 "initial buffer space");
++  memset (scratch->tmp.data, 0, sizeof (struct dataset));
++}
++
++static void
++addgetnetgrentX_scratch_free (struct addgetnetgrentX_scratch *scratch)
++{
++  scratch_buffer_free (&scratch->buffer);
++  scratch_buffer_free (&scratch->tmp);
++}
++
++/* Copy LENGTH bytes from S into SCRATCH.  Returns NULL if SCRATCH
++   could not be resized, otherwise a pointer to the copy.  */
++static char *
++addgetnetgrentX_append_n (struct addgetnetgrentX_scratch *scratch,
++			  const char *s, size_t length)
++{
++  while (true)
++    {
++      size_t remaining = scratch->buffer.length - scratch->buffer_used;
++      if (remaining >= length)
++	break;
++      if (!scratch_buffer_grow_preserve (&scratch->buffer))
++	return NULL;
++    }
++  char *copy = scratch->buffer.data + scratch->buffer_used;
++  memcpy (copy, s, length);
++  scratch->buffer_used += length;
++  return copy;
++}
++
++/* Copy S into SCRATCH, including its null terminator.  Returns false
++   if SCRATCH could not be resized.  */
++static bool
++addgetnetgrentX_append (struct addgetnetgrentX_scratch *scratch, const char *s)
++{
++  if (s == NULL)
++    s = "";
++  return addgetnetgrentX_append_n (scratch, s, strlen (s) + 1) != NULL;
++}
++
++/* Caller must initialize and free *SCRATCH.  If the return value is
++   negative, this function has sent a notfound response.  */
+ static time_t
+ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+ 		 const char *key, uid_t uid, struct hashentry *he,
+-		 struct datahead *dh, struct dataset **resultp,
+-		 void **tofreep)
++		 struct datahead *dh, struct addgetnetgrentX_scratch *scratch)
+ {
+   if (__glibc_unlikely (debug_level > 0))
+     {
+@@ -132,14 +209,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+ 
+   char *key_copy = NULL;
+   struct __netgrent data;
+-  size_t buflen = MAX (1024, sizeof (*dataset) + req->key_len);
+-  size_t buffilled = sizeof (*dataset);
+-  char *buffer = NULL;
+   size_t nentries = 0;
+   size_t group_len = strlen (key) + 1;
+   struct name_list *first_needed
+     = alloca (sizeof (struct name_list) + group_len);
+-  *tofreep = NULL;
+ 
+   if (netgroup_database == NULL
+       && !__nss_database_get (nss_database_netgroup, &netgroup_database))
+@@ -151,8 +224,6 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+     }
+ 
+   memset (&data, '\0', sizeof (data));
+-  buffer = xmalloc (buflen);
+-  *tofreep = buffer;
+   first_needed->next = first_needed;
+   memcpy (first_needed->name, key, group_len);
+   data.needed_groups = first_needed;
+@@ -195,8 +266,8 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+ 		while (1)
+ 		  {
+ 		    int e;
+-		    status = getfct.f (&data, buffer + buffilled,
+-				       buflen - buffilled - req->key_len, &e);
++		    status = getfct.f (&data, scratch->tmp.data,
++				       scratch->tmp.length, &e);
+ 		    if (status == NSS_STATUS_SUCCESS)
+ 		      {
+ 			if (data.type == triple_val)
+@@ -204,68 +275,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+ 			    const char *nhost = data.val.triple.host;
+ 			    const char *nuser = data.val.triple.user;
+ 			    const char *ndomain = data.val.triple.domain;
+-
+-			    size_t hostlen = strlen (nhost ?: "") + 1;
+-			    size_t userlen = strlen (nuser ?: "") + 1;
+-			    size_t domainlen = strlen (ndomain ?: "") + 1;
+-
+-			    if (nhost == NULL || nuser == NULL || ndomain == NULL
+-				|| nhost > nuser || nuser > ndomain)
+-			      {
+-				const char *last = nhost;
+-				if (last == NULL
+-				    || (nuser != NULL && nuser > last))
+-				  last = nuser;
+-				if (last == NULL
+-				    || (ndomain != NULL && ndomain > last))
+-				  last = ndomain;
+-
+-				size_t bufused
+-				  = (last == NULL
+-				     ? buffilled
+-				     : last + strlen (last) + 1 - buffer);
+-
+-				/* We have to make temporary copies.  */
+-				size_t needed = hostlen + userlen + domainlen;
+-
+-				if (buflen - req->key_len - bufused < needed)
+-				  {
+-				    buflen += MAX (buflen, 2 * needed);
+-				    /* Save offset in the old buffer.  We don't
+-				       bother with the NULL check here since
+-				       we'll do that later anyway.  */
+-				    size_t nhostdiff = nhost - buffer;
+-				    size_t nuserdiff = nuser - buffer;
+-				    size_t ndomaindiff = ndomain - buffer;
+-
+-				    char *newbuf = xrealloc (buffer, buflen);
+-				    /* Fix up the triplet pointers into the new
+-				       buffer.  */
+-				    nhost = (nhost ? newbuf + nhostdiff
+-					     : NULL);
+-				    nuser = (nuser ? newbuf + nuserdiff
+-					     : NULL);
+-				    ndomain = (ndomain ? newbuf + ndomaindiff
+-					       : NULL);
+-				    *tofreep = buffer = newbuf;
+-				  }
+-
+-				nhost = memcpy (buffer + bufused,
+-						nhost ?: "", hostlen);
+-				nuser = memcpy ((char *) nhost + hostlen,
+-						nuser ?: "", userlen);
+-				ndomain = memcpy ((char *) nuser + userlen,
+-						  ndomain ?: "", domainlen);
+-			      }
+-
+-			    char *wp = buffer + buffilled;
+-			    wp = memmove (wp, nhost ?: "", hostlen);
+-			    wp += hostlen;
+-			    wp = memmove (wp, nuser ?: "", userlen);
+-			    wp += userlen;
+-			    wp = memmove (wp, ndomain ?: "", domainlen);
+-			    wp += domainlen;
+-			    buffilled = wp - buffer;
++			    if (!(addgetnetgrentX_append (scratch, nhost)
++				  && addgetnetgrentX_append (scratch, nuser)
++				  && addgetnetgrentX_append (scratch, ndomain)))
++			      return send_notfound (fd);
+ 			    ++nentries;
+ 			  }
+ 			else
+@@ -317,8 +330,8 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+ 		      }
+ 		    else if (status == NSS_STATUS_TRYAGAIN && e == ERANGE)
+ 		      {
+-			buflen *= 2;
+-			*tofreep = buffer = xrealloc (buffer, buflen);
++			if (!scratch_buffer_grow (&scratch->tmp))
++			  return send_notfound (fd);
+ 		      }
+ 		    else if (status == NSS_STATUS_RETURN
+ 			     || status == NSS_STATUS_NOTFOUND
+@@ -351,10 +364,17 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+       goto maybe_cache_add;
+     }
+ 
+-  total = buffilled;
++  /* Capture the result size without the key appended.   */
++  total = scratch->buffer_used;
++
++  /* Make a copy of the key.  The scratch buffer must not move after
++     this point.  */
++  key_copy = addgetnetgrentX_append_n (scratch, key, req->key_len);
++  if (key_copy == NULL)
++    return send_notfound (fd);
+ 
+   /* Fill in the dataset.  */
+-  dataset = (struct dataset *) buffer;
++  dataset = scratch->buffer.data;
+   timeout = datahead_init_pos (&dataset->head, total + req->key_len,
+ 			       total - offsetof (struct dataset, resp),
+ 			       he == NULL ? 0 : dh->nreloads + 1,
+@@ -363,11 +383,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+   dataset->resp.version = NSCD_VERSION;
+   dataset->resp.found = 1;
+   dataset->resp.nresults = nentries;
+-  dataset->resp.result_len = buffilled - sizeof (*dataset);
+-
+-  assert (buflen - buffilled >= req->key_len);
+-  key_copy = memcpy (buffer + buffilled, key, req->key_len);
+-  buffilled += req->key_len;
++  dataset->resp.result_len = total - sizeof (*dataset);
+ 
+   /* Now we can determine whether on refill we have to create a new
+      record or not.  */
+@@ -398,7 +414,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+     if (__glibc_likely (newp != NULL))
+       {
+ 	/* Adjust pointer into the memory block.  */
+-	key_copy = (char *) newp + (key_copy - buffer);
++	key_copy = (char *) newp + (key_copy - (char *) dataset);
+ 
+ 	dataset = memcpy (newp, dataset, total + req->key_len);
+ 	cacheable = true;
+@@ -439,7 +455,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+     }
+ 
+  out:
+-  *resultp = dataset;
++  scratch->dataset = dataset;
+ 
+   return timeout;
+ }
+@@ -460,6 +476,9 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
+   if (user != NULL)
+     key = strchr (key, '\0') + 1;
+   const char *domain = *key++ ? key : NULL;
++  struct addgetnetgrentX_scratch scratch;
++
++  addgetnetgrentX_scratch_init (&scratch);
+ 
+   if (__glibc_unlikely (debug_level > 0))
+     {
+@@ -475,12 +494,8 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
+ 							    group, group_len,
+ 							    db, uid);
+   time_t timeout;
+-  void *tofree;
+   if (result != NULL)
+-    {
+-      timeout = result->head.timeout;
+-      tofree = NULL;
+-    }
++    timeout = result->head.timeout;
+   else
+     {
+       request_header req_get =
+@@ -489,7 +504,10 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
+ 	  .key_len = group_len
+ 	};
+       timeout = addgetnetgrentX (db, -1, &req_get, group, uid, NULL, NULL,
+-				 &result, &tofree);
++				 &scratch);
++      result = scratch.dataset;
++      if (timeout < 0)
++	goto out;
+     }
+ 
+   struct indataset
+@@ -603,7 +621,7 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
+     }
+ 
+  out:
+-  free (tofree);
++  addgetnetgrentX_scratch_free (&scratch);
+   return timeout;
+ }
+ 
+@@ -613,11 +631,12 @@ addgetnetgrentX_ignore (struct database_dyn *db, int fd, request_header *req,
+ 			const char *key, uid_t uid, struct hashentry *he,
+ 			struct datahead *dh)
+ {
+-  struct dataset *ignore;
+-  void *tofree;
+-  time_t timeout = addgetnetgrentX (db, fd, req, key, uid, he, dh,
+-				    &ignore, &tofree);
+-  free (tofree);
++  struct addgetnetgrentX_scratch scratch;
++  addgetnetgrentX_scratch_init (&scratch);
++  time_t timeout = addgetnetgrentX (db, fd, req, key, uid, he, dh, &scratch);
++  addgetnetgrentX_scratch_free (&scratch);
++  if (timeout < 0)
++    timeout = 0;
+   return timeout;
+ }
+ 
+@@ -661,5 +680,9 @@ readdinnetgr (struct database_dyn *db, struct hashentry *he,
+       .key_len = he->len
+     };
+ 
+-  return addinnetgrX (db, -1, &req, db->data + he->key, he->owner, he, dh);
++  int timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner,
++			     he, dh);
++  if (timeout < 0)
++    timeout = 0;
++  return timeout;
+ }
+
+commit fd658f026f25cf59e8db243bc3b3e09cd5a20ba0
+Author: H.J. Lu <hjl.tools@gmail.com>
+Date:   Thu Apr 25 08:06:52 2024 -0700
+
+    elf: Also compile dl-misc.os with $(rtld-early-cflags)
+    
+    Also compile dl-misc.os with $(rtld-early-cflags) to avoid
+    
+    Program received signal SIGILL, Illegal instruction.
+    0x00007ffff7fd36ea in _dl_strtoul (nptr=nptr@entry=0x7fffffffe2c9 "2",
+        endptr=endptr@entry=0x7fffffffd728) at dl-misc.c:156
+    156       bool positive = true;
+    (gdb) bt
+     #0  0x00007ffff7fd36ea in _dl_strtoul (nptr=nptr@entry=0x7fffffffe2c9 "2",
+        endptr=endptr@entry=0x7fffffffd728) at dl-misc.c:156
+     #1  0x00007ffff7fdb1a9 in tunable_initialize (
+        cur=cur@entry=0x7ffff7ffbc00 <tunable_list+2176>,
+        strval=strval@entry=0x7fffffffe2c9 "2", len=len@entry=1)
+        at dl-tunables.c:131
+     #2  0x00007ffff7fdb3a2 in parse_tunables (valstring=<optimized out>)
+        at dl-tunables.c:258
+     #3  0x00007ffff7fdb5d9 in __GI___tunables_init (envp=0x7fffffffdd58)
+        at dl-tunables.c:288
+     #4  0x00007ffff7fe44c3 in _dl_sysdep_start (
+        start_argptr=start_argptr@entry=0x7fffffffdcb0,
+        dl_main=dl_main@entry=0x7ffff7fe5f80 <dl_main>)
+        at ../sysdeps/unix/sysv/linux/dl-sysdep.c:110
+     #5  0x00007ffff7fe5cae in _dl_start_final (arg=0x7fffffffdcb0) at rtld.c:494
+     #6  _dl_start (arg=0x7fffffffdcb0) at rtld.c:581
+     #7  0x00007ffff7fe4b38 in _start ()
+    (gdb)
+    
+    when setting GLIBC_TUNABLES in glibc compiled with APX.
+    Reviewed-by: Florian Weimer <fweimer@redhat.com>
+    
+    (cherry picked from commit 049b7684c912dd32b67b1b15b0f43bf07d5f512e)
+
+diff --git a/elf/Makefile b/elf/Makefile
+index 69aa423c4b..a50a988e73 100644
+--- a/elf/Makefile
++++ b/elf/Makefile
+@@ -170,6 +170,7 @@ CFLAGS-.op += $(call elide-stack-protector,.op,$(elide-routines.os))
+ CFLAGS-.os += $(call elide-stack-protector,.os,$(all-rtld-routines))
+ 
+ # Add the requested compiler flags to the early startup code.
++CFLAGS-dl-misc.os += $(rtld-early-cflags)
+ CFLAGS-dl-printf.os += $(rtld-early-cflags)
+ CFLAGS-dl-setup_hash.os += $(rtld-early-cflags)
+ CFLAGS-dl-sysdep.os += $(rtld-early-cflags)
+
+commit 9831f98c266a8d56d1bf729b709c08e40375540c
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Fri Apr 19 14:38:17 2024 +0200
+
+    login: Check default sizes of structs utmp, utmpx, lastlog
+    
+    The default <utmp-size.h> is for ports with a 64-bit time_t.
+    Ports with a 32-bit time_t or with __WORDSIZE_TIME64_COMPAT32=1
+    need to override it.
+    
+    Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+    (cherry picked from commit 4d4da5aab936504b2d3eca3146e109630d9093c4)
+
+diff --git a/login/Makefile b/login/Makefile
+index 1e22008a61..b26ac42bfc 100644
+--- a/login/Makefile
++++ b/login/Makefile
+@@ -44,7 +44,7 @@ subdir-dirs = programs
+ vpath %.c programs
+ 
+ tests := tst-utmp tst-utmpx tst-grantpt tst-ptsname tst-getlogin tst-updwtmpx \
+-  tst-pututxline-lockfail tst-pututxline-cache
++  tst-pututxline-lockfail tst-pututxline-cache tst-utmp-size
+ 
+ # Empty compatibility library for old binaries.
+ extra-libs      := libutil
+diff --git a/login/tst-utmp-size.c b/login/tst-utmp-size.c
+new file mode 100644
+index 0000000000..1b7f7ff042
+--- /dev/null
++++ b/login/tst-utmp-size.c
+@@ -0,0 +1,33 @@
++/* Check expected sizes of struct utmp, struct utmpx, struct lastlog.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <utmp.h>
++#include <utmpx.h>
++#include <utmp-size.h>
++
++static int
++do_test (void)
++{
++  _Static_assert (sizeof (struct utmp) == UTMP_SIZE, "struct utmp size");
++  _Static_assert (sizeof (struct utmpx) == UTMP_SIZE, "struct utmpx size");
++  _Static_assert (sizeof (struct lastlog) == LASTLOG_SIZE,
++                  "struct lastlog size");
++  return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/sysdeps/arc/utmp-size.h b/sysdeps/arc/utmp-size.h
+new file mode 100644
+index 0000000000..a247fcd3da
+--- /dev/null
++++ b/sysdeps/arc/utmp-size.h
+@@ -0,0 +1,3 @@
++/* arc has less padding than other architectures with 64-bit time_t.  */
++#define UTMP_SIZE 392
++#define LASTLOG_SIZE 296
+diff --git a/sysdeps/arm/utmp-size.h b/sysdeps/arm/utmp-size.h
+new file mode 100644
+index 0000000000..8f21ebe1b6
+--- /dev/null
++++ b/sysdeps/arm/utmp-size.h
+@@ -0,0 +1,2 @@
++#define UTMP_SIZE 384
++#define LASTLOG_SIZE 292
+diff --git a/sysdeps/csky/utmp-size.h b/sysdeps/csky/utmp-size.h
+new file mode 100644
+index 0000000000..8f21ebe1b6
+--- /dev/null
++++ b/sysdeps/csky/utmp-size.h
+@@ -0,0 +1,2 @@
++#define UTMP_SIZE 384
++#define LASTLOG_SIZE 292
+diff --git a/sysdeps/generic/utmp-size.h b/sysdeps/generic/utmp-size.h
+new file mode 100644
+index 0000000000..89dbe878b0
+--- /dev/null
++++ b/sysdeps/generic/utmp-size.h
+@@ -0,0 +1,23 @@
++/* Expected sizes of utmp-related structures stored in files.  64-bit version.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Expected size, in bytes, of struct utmp and struct utmpx.  */
++#define UTMP_SIZE 400
++
++/* Expected size, in bytes, of struct lastlog.  */
++#define LASTLOG_SIZE 296
+diff --git a/sysdeps/hppa/utmp-size.h b/sysdeps/hppa/utmp-size.h
+new file mode 100644
+index 0000000000..8f21ebe1b6
+--- /dev/null
++++ b/sysdeps/hppa/utmp-size.h
+@@ -0,0 +1,2 @@
++#define UTMP_SIZE 384
++#define LASTLOG_SIZE 292
+diff --git a/sysdeps/m68k/utmp-size.h b/sysdeps/m68k/utmp-size.h
+new file mode 100644
+index 0000000000..5946685819
+--- /dev/null
++++ b/sysdeps/m68k/utmp-size.h
+@@ -0,0 +1,3 @@
++/* m68k has 2-byte alignment.  */
++#define UTMP_SIZE 382
++#define LASTLOG_SIZE 292
+diff --git a/sysdeps/microblaze/utmp-size.h b/sysdeps/microblaze/utmp-size.h
+new file mode 100644
+index 0000000000..8f21ebe1b6
+--- /dev/null
++++ b/sysdeps/microblaze/utmp-size.h
+@@ -0,0 +1,2 @@
++#define UTMP_SIZE 384
++#define LASTLOG_SIZE 292
+diff --git a/sysdeps/mips/utmp-size.h b/sysdeps/mips/utmp-size.h
+new file mode 100644
+index 0000000000..8f21ebe1b6
+--- /dev/null
++++ b/sysdeps/mips/utmp-size.h
+@@ -0,0 +1,2 @@
++#define UTMP_SIZE 384
++#define LASTLOG_SIZE 292
+diff --git a/sysdeps/nios2/utmp-size.h b/sysdeps/nios2/utmp-size.h
+new file mode 100644
+index 0000000000..8f21ebe1b6
+--- /dev/null
++++ b/sysdeps/nios2/utmp-size.h
+@@ -0,0 +1,2 @@
++#define UTMP_SIZE 384
++#define LASTLOG_SIZE 292
+diff --git a/sysdeps/or1k/utmp-size.h b/sysdeps/or1k/utmp-size.h
+new file mode 100644
+index 0000000000..6b3653aa4d
+--- /dev/null
++++ b/sysdeps/or1k/utmp-size.h
+@@ -0,0 +1,3 @@
++/* or1k has less padding than other architectures with 64-bit time_t.  */
++#define UTMP_SIZE 392
++#define LASTLOG_SIZE 296
+diff --git a/sysdeps/powerpc/utmp-size.h b/sysdeps/powerpc/utmp-size.h
+new file mode 100644
+index 0000000000..8f21ebe1b6
+--- /dev/null
++++ b/sysdeps/powerpc/utmp-size.h
+@@ -0,0 +1,2 @@
++#define UTMP_SIZE 384
++#define LASTLOG_SIZE 292
+diff --git a/sysdeps/riscv/utmp-size.h b/sysdeps/riscv/utmp-size.h
+new file mode 100644
+index 0000000000..8f21ebe1b6
+--- /dev/null
++++ b/sysdeps/riscv/utmp-size.h
+@@ -0,0 +1,2 @@
++#define UTMP_SIZE 384
++#define LASTLOG_SIZE 292
+diff --git a/sysdeps/sh/utmp-size.h b/sysdeps/sh/utmp-size.h
+new file mode 100644
+index 0000000000..8f21ebe1b6
+--- /dev/null
++++ b/sysdeps/sh/utmp-size.h
+@@ -0,0 +1,2 @@
++#define UTMP_SIZE 384
++#define LASTLOG_SIZE 292
+diff --git a/sysdeps/sparc/utmp-size.h b/sysdeps/sparc/utmp-size.h
+new file mode 100644
+index 0000000000..8f21ebe1b6
+--- /dev/null
++++ b/sysdeps/sparc/utmp-size.h
+@@ -0,0 +1,2 @@
++#define UTMP_SIZE 384
++#define LASTLOG_SIZE 292
+diff --git a/sysdeps/x86/utmp-size.h b/sysdeps/x86/utmp-size.h
+new file mode 100644
+index 0000000000..8f21ebe1b6
+--- /dev/null
++++ b/sysdeps/x86/utmp-size.h
+@@ -0,0 +1,2 @@
++#define UTMP_SIZE 384
++#define LASTLOG_SIZE 292
+
+commit 836d43b98973e0845b739ff5d3aad3af09dc7d0f
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Fri Apr 19 14:38:17 2024 +0200
+
+    login: structs utmp, utmpx, lastlog _TIME_BITS independence (bug 30701)
+    
+    These structs describe file formats under /var/log, and should not
+    depend on the definition of _TIME_BITS.  This is achieved by
+    defining __WORDSIZE_TIME64_COMPAT32 to 1 on 32-bit ports that
+    support 32-bit time_t values (where __time_t is 32 bits).
+    
+    Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+    (cherry picked from commit 9abdae94c7454c45e02e97e4ed1eb1b1915d13d8)
+
+diff --git a/bits/wordsize.h b/bits/wordsize.h
+index 14edae3a11..53013a9275 100644
+--- a/bits/wordsize.h
++++ b/bits/wordsize.h
+@@ -21,7 +21,9 @@
+ #define __WORDSIZE32_PTRDIFF_LONG
+ 
+ /* Set to 1 in order to force time types to be 32 bits instead of 64 bits in
+-   struct lastlog and struct utmp{,x} on 64-bit ports.  This may be done in
++   struct lastlog and struct utmp{,x}.  This may be done in
+    order to make 64-bit ports compatible with 32-bit ports.  Set to 0 for
+-   64-bit ports where the time types are 64-bits or for any 32-bit ports.  */
++   64-bit ports where the time types are 64-bits and new 32-bit ports
++   where time_t is 64 bits, and there is no companion architecture with
++   32-bit time_t.  */
+ #define __WORDSIZE_TIME64_COMPAT32
+diff --git a/login/Makefile b/login/Makefile
+index b26ac42bfc..f91190e3dc 100644
+--- a/login/Makefile
++++ b/login/Makefile
+@@ -44,7 +44,9 @@ subdir-dirs = programs
+ vpath %.c programs
+ 
+ tests := tst-utmp tst-utmpx tst-grantpt tst-ptsname tst-getlogin tst-updwtmpx \
+-  tst-pututxline-lockfail tst-pututxline-cache tst-utmp-size
++  tst-pututxline-lockfail tst-pututxline-cache tst-utmp-size tst-utmp-size-64
++
++CFLAGS-tst-utmp-size-64.c += -D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64
+ 
+ # Empty compatibility library for old binaries.
+ extra-libs      := libutil
+diff --git a/login/tst-utmp-size-64.c b/login/tst-utmp-size-64.c
+new file mode 100644
+index 0000000000..7a581a4c12
+--- /dev/null
++++ b/login/tst-utmp-size-64.c
+@@ -0,0 +1,2 @@
++/* The on-disk layout must not change in time64 mode.  */
++#include "tst-utmp-size.c"
+diff --git a/sysdeps/arm/bits/wordsize.h b/sysdeps/arm/bits/wordsize.h
+new file mode 100644
+index 0000000000..6ecbfe7c86
+--- /dev/null
++++ b/sysdeps/arm/bits/wordsize.h
+@@ -0,0 +1,21 @@
++/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define __WORDSIZE			32
++#define __WORDSIZE_TIME64_COMPAT32	1
++#define __WORDSIZE32_SIZE_ULONG		0
++#define __WORDSIZE32_PTRDIFF_LONG	0
+diff --git a/sysdeps/csky/bits/wordsize.h b/sysdeps/csky/bits/wordsize.h
+new file mode 100644
+index 0000000000..6ecbfe7c86
+--- /dev/null
++++ b/sysdeps/csky/bits/wordsize.h
+@@ -0,0 +1,21 @@
++/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define __WORDSIZE			32
++#define __WORDSIZE_TIME64_COMPAT32	1
++#define __WORDSIZE32_SIZE_ULONG		0
++#define __WORDSIZE32_PTRDIFF_LONG	0
+diff --git a/sysdeps/m68k/bits/wordsize.h b/sysdeps/m68k/bits/wordsize.h
+new file mode 100644
+index 0000000000..6ecbfe7c86
+--- /dev/null
++++ b/sysdeps/m68k/bits/wordsize.h
+@@ -0,0 +1,21 @@
++/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define __WORDSIZE			32
++#define __WORDSIZE_TIME64_COMPAT32	1
++#define __WORDSIZE32_SIZE_ULONG		0
++#define __WORDSIZE32_PTRDIFF_LONG	0
+diff --git a/sysdeps/microblaze/bits/wordsize.h b/sysdeps/microblaze/bits/wordsize.h
+new file mode 100644
+index 0000000000..6ecbfe7c86
+--- /dev/null
++++ b/sysdeps/microblaze/bits/wordsize.h
+@@ -0,0 +1,21 @@
++/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define __WORDSIZE			32
++#define __WORDSIZE_TIME64_COMPAT32	1
++#define __WORDSIZE32_SIZE_ULONG		0
++#define __WORDSIZE32_PTRDIFF_LONG	0
+diff --git a/sysdeps/mips/bits/wordsize.h b/sysdeps/mips/bits/wordsize.h
+index 57f0f2a22f..30dd3fd85d 100644
+--- a/sysdeps/mips/bits/wordsize.h
++++ b/sysdeps/mips/bits/wordsize.h
+@@ -19,11 +19,7 @@
+ 
+ #define __WORDSIZE			_MIPS_SZPTR
+ 
+-#if _MIPS_SIM == _ABI64
+-# define __WORDSIZE_TIME64_COMPAT32	1
+-#else
+-# define __WORDSIZE_TIME64_COMPAT32	0
+-#endif
++#define __WORDSIZE_TIME64_COMPAT32	1
+ 
+ #if __WORDSIZE == 32
+ #define __WORDSIZE32_SIZE_ULONG		0
+diff --git a/sysdeps/nios2/bits/wordsize.h b/sysdeps/nios2/bits/wordsize.h
+new file mode 100644
+index 0000000000..6ecbfe7c86
+--- /dev/null
++++ b/sysdeps/nios2/bits/wordsize.h
+@@ -0,0 +1,21 @@
++/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define __WORDSIZE			32
++#define __WORDSIZE_TIME64_COMPAT32	1
++#define __WORDSIZE32_SIZE_ULONG		0
++#define __WORDSIZE32_PTRDIFF_LONG	0
+diff --git a/sysdeps/powerpc/powerpc32/bits/wordsize.h b/sysdeps/powerpc/powerpc32/bits/wordsize.h
+index 04ca9debf0..6993fb6b29 100644
+--- a/sysdeps/powerpc/powerpc32/bits/wordsize.h
++++ b/sysdeps/powerpc/powerpc32/bits/wordsize.h
+@@ -2,10 +2,9 @@
+ 
+ #if defined __powerpc64__
+ # define __WORDSIZE	64
+-# define __WORDSIZE_TIME64_COMPAT32	1
+ #else
+ # define __WORDSIZE	32
+-# define __WORDSIZE_TIME64_COMPAT32	0
+ # define __WORDSIZE32_SIZE_ULONG	0
+ # define __WORDSIZE32_PTRDIFF_LONG	0
+ #endif
++#define __WORDSIZE_TIME64_COMPAT32	1
+diff --git a/sysdeps/powerpc/powerpc64/bits/wordsize.h b/sysdeps/powerpc/powerpc64/bits/wordsize.h
+index 04ca9debf0..6993fb6b29 100644
+--- a/sysdeps/powerpc/powerpc64/bits/wordsize.h
++++ b/sysdeps/powerpc/powerpc64/bits/wordsize.h
+@@ -2,10 +2,9 @@
+ 
+ #if defined __powerpc64__
+ # define __WORDSIZE	64
+-# define __WORDSIZE_TIME64_COMPAT32	1
+ #else
+ # define __WORDSIZE	32
+-# define __WORDSIZE_TIME64_COMPAT32	0
+ # define __WORDSIZE32_SIZE_ULONG	0
+ # define __WORDSIZE32_PTRDIFF_LONG	0
+ #endif
++#define __WORDSIZE_TIME64_COMPAT32	1
+diff --git a/sysdeps/sh/bits/wordsize.h b/sysdeps/sh/bits/wordsize.h
+new file mode 100644
+index 0000000000..6ecbfe7c86
+--- /dev/null
++++ b/sysdeps/sh/bits/wordsize.h
+@@ -0,0 +1,21 @@
++/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define __WORDSIZE			32
++#define __WORDSIZE_TIME64_COMPAT32	1
++#define __WORDSIZE32_SIZE_ULONG		0
++#define __WORDSIZE32_PTRDIFF_LONG	0
+diff --git a/sysdeps/sparc/sparc32/bits/wordsize.h b/sysdeps/sparc/sparc32/bits/wordsize.h
+index 4bbd2e63b4..a2e79e0fa9 100644
+--- a/sysdeps/sparc/sparc32/bits/wordsize.h
++++ b/sysdeps/sparc/sparc32/bits/wordsize.h
+@@ -1,6 +1,6 @@
+ /* Determine the wordsize from the preprocessor defines.  */
+ 
+ #define __WORDSIZE	32
+-#define __WORDSIZE_TIME64_COMPAT32	0
++#define __WORDSIZE_TIME64_COMPAT32	1
+ #define __WORDSIZE32_SIZE_ULONG	0
+ #define __WORDSIZE32_PTRDIFF_LONG	0
+diff --git a/sysdeps/sparc/sparc64/bits/wordsize.h b/sysdeps/sparc/sparc64/bits/wordsize.h
+index 2f66f10d72..ea103e5970 100644
+--- a/sysdeps/sparc/sparc64/bits/wordsize.h
++++ b/sysdeps/sparc/sparc64/bits/wordsize.h
+@@ -2,10 +2,9 @@
+ 
+ #if defined __arch64__ || defined __sparcv9
+ # define __WORDSIZE	64
+-# define __WORDSIZE_TIME64_COMPAT32	1
+ #else
+ # define __WORDSIZE	32
+-# define __WORDSIZE_TIME64_COMPAT32	0
+ # define __WORDSIZE32_SIZE_ULONG	0
+ # define __WORDSIZE32_PTRDIFF_LONG	0
+ #endif
++#define __WORDSIZE_TIME64_COMPAT32	1
+diff --git a/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h b/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h
+new file mode 100644
+index 0000000000..6ecbfe7c86
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h
+@@ -0,0 +1,21 @@
++/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define __WORDSIZE			32
++#define __WORDSIZE_TIME64_COMPAT32	1
++#define __WORDSIZE32_SIZE_ULONG		0
++#define __WORDSIZE32_PTRDIFF_LONG	0
+diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h b/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h
+index 04ca9debf0..6993fb6b29 100644
+--- a/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h
++++ b/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h
+@@ -2,10 +2,9 @@
+ 
+ #if defined __powerpc64__
+ # define __WORDSIZE	64
+-# define __WORDSIZE_TIME64_COMPAT32	1
+ #else
+ # define __WORDSIZE	32
+-# define __WORDSIZE_TIME64_COMPAT32	0
+ # define __WORDSIZE32_SIZE_ULONG	0
+ # define __WORDSIZE32_PTRDIFF_LONG	0
+ #endif
++#define __WORDSIZE_TIME64_COMPAT32	1
+diff --git a/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h b/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h
+index 7562875ee2..ea103e5970 100644
+--- a/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h
++++ b/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h
+@@ -2,10 +2,9 @@
+ 
+ #if defined __arch64__ || defined __sparcv9
+ # define __WORDSIZE	64
+-# define __WORDSIZE_TIME64_COMPAT32	1
+ #else
+ # define __WORDSIZE	32
+ # define __WORDSIZE32_SIZE_ULONG	0
+ # define __WORDSIZE32_PTRDIFF_LONG	0
+-# define __WORDSIZE_TIME64_COMPAT32	0
+ #endif
++#define __WORDSIZE_TIME64_COMPAT32	1
+diff --git a/sysdeps/x86/bits/wordsize.h b/sysdeps/x86/bits/wordsize.h
+index 70f652bca1..3f40aa76f9 100644
+--- a/sysdeps/x86/bits/wordsize.h
++++ b/sysdeps/x86/bits/wordsize.h
+@@ -8,10 +8,9 @@
+ #define __WORDSIZE32_PTRDIFF_LONG	0
+ #endif
+ 
++#define __WORDSIZE_TIME64_COMPAT32 1
++
+ #ifdef __x86_64__
+-# define __WORDSIZE_TIME64_COMPAT32	1
+ /* Both x86-64 and x32 use the 64-bit system call interface.  */
+ # define __SYSCALL_WORDSIZE		64
+-#else
+-# define __WORDSIZE_TIME64_COMPAT32	0
+ #endif
+
+commit acc56074b0a5127631a64640aef1b7c5c103ebd8
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Thu May 2 17:06:19 2024 +0200
+
+    nscd: Use time_t for return type of addgetnetgrentX
+    
+    Using int may give false results for future dates (timeouts after the
+    year 2028).
+    
+    Fixes commit 04a21e050d64a1193a6daab872bca2528bda44b ("CVE-2024-33601,
+    CVE-2024-33602: nscd: netgroup: Use two buffers in addgetnetgrentX
+    (bug 31680)").
+    
+    Reviewed-by: Carlos O'Donell <carlos@redhat.com>
+    (cherry picked from commit 4bbca1a44691a6e9adcee5c6798a707b626bc331)
+
+diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
+index e8fe041846..01d554af9c 100644
+--- a/nscd/netgroupcache.c
++++ b/nscd/netgroupcache.c
+@@ -680,8 +680,8 @@ readdinnetgr (struct database_dyn *db, struct hashentry *he,
+       .key_len = he->len
+     };
+ 
+-  int timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner,
+-			     he, dh);
++  time_t timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner,
++				he, dh);
+   if (timeout < 0)
+     timeout = 0;
+   return timeout;
+
+commit 273a835fe7c685cc54266bb8b502787bad5e9bae
+Author: Carlos O'Donell <carlos@redhat.com>
+Date:   Tue Apr 23 13:30:37 2024 -0400
+
+    time: Allow later version licensing.
+    
+    The FSF's Licensing and Compliance Lab noted a discrepancy in the
+    licensing of several files in the glibc package.
+    
+    When timespect_get.c was impelemented the license did not include
+    the standard ", or (at your option) any later version." text.
+    
+    Change the license in timespec_get.c and all copied files to match
+    the expected license.
+    
+    This change was previously approved in principle by the FSF in
+    RT ticket #1316403. And a similar instance was fixed in
+    commit 46703efa02f6ddebce5ee54c92f7c32598de0de6.
+    
+    (cherry picked from commit 91695ee4598b39d181ab8df579b888a8863c4cab)
+
+diff --git a/sysdeps/unix/sysv/linux/timespec_get.c b/sysdeps/unix/sysv/linux/timespec_get.c
+index c6e5e66289..778d1e3354 100644
+--- a/sysdeps/unix/sysv/linux/timespec_get.c
++++ b/sysdeps/unix/sysv/linux/timespec_get.c
+@@ -5,7 +5,7 @@
+    The GNU C Library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+-   version 2.1 of the License.
++   version 2.1 of the License, or (at your option) any later version.
+ 
+    The GNU C Library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+diff --git a/sysdeps/unix/sysv/linux/timespec_getres.c b/sysdeps/unix/sysv/linux/timespec_getres.c
+index 5acebe2a2c..2eef9e512c 100644
+--- a/sysdeps/unix/sysv/linux/timespec_getres.c
++++ b/sysdeps/unix/sysv/linux/timespec_getres.c
+@@ -5,7 +5,7 @@
+    The GNU C Library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+-   version 2.1 of the License.
++   version 2.1 of the License, or (at your option) any later version.
+ 
+    The GNU C Library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+diff --git a/time/timespec_get.c b/time/timespec_get.c
+index b031e42ca2..26a044bca6 100644
+--- a/time/timespec_get.c
++++ b/time/timespec_get.c
+@@ -4,7 +4,7 @@
+    The GNU C Library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+-   version 2.1 of the License.
++   version 2.1 of the License, or (at your option) any later version.
+ 
+    The GNU C Library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+diff --git a/time/timespec_getres.c b/time/timespec_getres.c
+index edb397507c..2e18b8bcac 100644
+--- a/time/timespec_getres.c
++++ b/time/timespec_getres.c
+@@ -5,7 +5,7 @@
+    The GNU C Library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+-   version 2.1 of the License.
++   version 2.1 of the License, or (at your option) any later version.
+ 
+    The GNU C Library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+
+commit 3148714ab61ad61281bae5a30f530d637034ac3b
+Author: Gabi Falk <gabifalk@gmx.com>
+Date:   Tue Apr 30 20:05:02 2024 +0000
+
+    i586: Fix multiple definitions of __memcpy_chk and __mempcpy_chk
+    
+    /home/bmg/install/compilers/x86_64-linux-gnu/lib/gcc/x86_64-glibc-linux-gnu/13.2.1/../../../../x86_64-glibc-linux-gnu/bin/ld: /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(memcpy_chk.o): in function `__memcpy_chk':
+    /home/bmg/src/glibc/debug/../sysdeps/i386/memcpy_chk.S:29: multiple definition of `__memcpy_chk';/home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(memcpy.o):/home/bmg/src/glibc/string/../sysdeps/i386/i586/memcpy.S:31: first defined here /home/bmg/install/compilers/x86_64-linux-gnu/lib/gcc/x86_64-glibc-linux-gnu/13.2.1/../../../../x86_64-glibc-linux-gnu/bin/ld: /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(mempcpy_chk.o): in function `__mempcpy_chk': /home/bmg/src/glibc/debug/../sysdeps/i386/mempcpy_chk.S:28: multiple definition of `__mempcpy_chk'; /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(mempcpy.o):/home/bmg/src/glibc/string/../sysdeps/i386/i586/memcpy.S:31: first defined here
+    
+    After this change, the static library built for i586, regardless of PIC
+    options, contains implementations of these functions respectively from
+    sysdeps/i386/memcpy_chk.S and sysdeps/i386/mempcpy_chk.S.  This ensures
+    that memcpy and mempcpy won't pull in __chk_fail and the routines it
+    calls.
+    
+    Reported-by: Florian Weimer <fweimer@redhat.com>
+    Signed-off-by: Gabi Falk <gabifalk@gmx.com>
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
+    (cherry picked from commit 789894a2f554d4503ecb2f13b2b4e93e43414f33)
+
+diff --git a/sysdeps/i386/i586/memcpy.S b/sysdeps/i386/i586/memcpy.S
+index 3e26f112d6..79856d498a 100644
+--- a/sysdeps/i386/i586/memcpy.S
++++ b/sysdeps/i386/i586/memcpy.S
+@@ -26,7 +26,7 @@
+ #define LEN	SRC+4
+ 
+         .text
+-#if defined PIC && IS_IN (libc)
++#if defined SHARED && IS_IN (libc)
+ ENTRY (__memcpy_chk)
+ 	movl	12(%esp), %eax
+ 	cmpl	%eax, 16(%esp)
+
+commit ad92c483a4bd34db1cfb3eb625212ea64848244f
+Author: Gabi Falk <gabifalk@gmx.com>
+Date:   Tue Apr 30 20:05:03 2024 +0000
+
+    i686: Fix multiple definitions of __memmove_chk and __memset_chk
+    
+    Commit c73c96a4a1af1326df7f96eec58209e1e04066d8 updated memcpy.S and
+    mempcpy.S, but omitted memmove.S and memset.S.  As a result, the static
+    library built as PIC, whether with or without multiarch support,
+    contains two definitions for each of the __memmove_chk and __memset_chk
+    symbols.
+    
+    /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../i686-pc-linux-gnu/bin/ld: /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../lib/libc.a(memset-ia32.o): in function `__memset_chk':
+    /var/tmp/portage/sys-libs/glibc-2.39-r3/work/glibc-2.39/string/../sysdeps/i386/i686/memset.S:32: multiple definition of `__memset_chk'; /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../lib/libc.a(memset_chk.o):/var/tmp/portage/sys-libs/glibc-2.39-r3/work/glibc-2.39/debug/../sysdeps/i386/i686/multiarch/memset_chk.c:24: first defined here
+    
+    After this change, regardless of PIC options, the static library, built
+    for i686 with multiarch contains implementations of these functions
+    respectively from debug/memmove_chk.c and debug/memset_chk.c, and
+    without multiarch contains implementations of these functions
+    respectively from sysdeps/i386/memmove_chk.S and
+    sysdeps/i386/memset_chk.S.  This ensures that memmove and memset won't
+    pull in __chk_fail and the routines it calls.
+    
+    Reported-by: Sam James <sam@gentoo.org>
+    Tested-by: Sam James <sam@gentoo.org>
+    Fixes: c73c96a4a1 ("i686: Fix build with --disable-multiarch")
+    Signed-off-by: Gabi Falk <gabifalk@gmx.com>
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
+    (cherry picked from commit 5a2cf833f5772d6c37c7adac388dd9af9cc1c4b9)
+
+diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S
+index f230359ad6..effd958120 100644
+--- a/sysdeps/i386/i686/memmove.S
++++ b/sysdeps/i386/i686/memmove.S
+@@ -29,7 +29,7 @@
+ #define SRC	DEST+4
+ #define LEN	SRC+4
+ 
+-#if defined PIC && IS_IN (libc)
++#if defined SHARED && IS_IN (libc)
+ ENTRY_CHK (__memmove_chk)
+ 	movl	12(%esp), %eax
+ 	cmpl	%eax, 16(%esp)
+diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
+index f02f5a6df7..ab06771ea0 100644
+--- a/sysdeps/i386/i686/memset.S
++++ b/sysdeps/i386/i686/memset.S
+@@ -27,7 +27,7 @@
+ #define LEN	CHR+4
+ 
+         .text
+-#if defined PIC && IS_IN (libc)
++#if defined SHARED && IS_IN (libc)
+ ENTRY_CHK (__memset_chk)
+ 	movl	12(%esp), %eax
+ 	cmpl	%eax, 16(%esp)
+
+commit ff110b2591f0bdeccd121c3726af19c62d6fb184
+Author: Gabi Falk <gabifalk@gmx.com>
+Date:   Tue Apr 30 20:05:04 2024 +0000
+
+    Add a test to check for duplicate definitions in the static library
+    
+    This change follows two previous fixes addressing multiple definitions
+    of __memcpy_chk and __mempcpy_chk functions on i586, and __memmove_chk
+    and __memset_chk functions on i686.  The test is intended to prevent
+    such issues from occurring in the future.
+    
+    Signed-off-by: Gabi Falk <gabifalk@gmx.com>
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
+    (cherry picked from commit ded2e0753e9c46debeb2e0d26c5e560d2581d314)
+
+diff --git a/Makefile b/Makefile
+index 7052b46df8..2e351c0321 100644
+--- a/Makefile
++++ b/Makefile
+@@ -577,6 +577,13 @@ $(objpfx)lint-makefiles.out: scripts/lint-makefiles.sh
+ 	$(SHELL) $< "$(PYTHON)" `pwd` > $@ ; \
+ 	$(evaluate-test)
+ 
++# Link libc.a as a whole to verify that it does not contain multiple
++# definitions of any symbols.
++tests-special += $(objpfx)link-static-libc.out
++$(objpfx)link-static-libc.out:
++	$(LINK.o) $(whole-archive) -r $(objpfx)libc.a -o /dev/null > $@ 2>&1; \
++	$(evaluate-test)
++
+ # Print test summary for tests in $1 .sum file;
+ # $2 is optional test identifier.
+ # Fail if there are unexpected failures in the test results.
+
+commit fa616ea3730cb42046d19f28d611be0bc390af7c
+Author: Sam James <sam@gentoo.org>
+Date:   Sat May 4 13:28:13 2024 +0100
+
+    Revert "Add a test to check for duplicate definitions in the static library"
+    
+    This reverts commit ff110b2591f0bdeccd121c3726af19c62d6fb184.
+    
+    I had the wrong cherry-pick reference (the commit content is right; it's
+    just referring to a base that isn't upstream), but let's revert and reapply
+    for clarity.
+    
+    Signed-off-by: Sam James <sam@gentoo.org>
+
+diff --git a/Makefile b/Makefile
+index 2e351c0321..7052b46df8 100644
+--- a/Makefile
++++ b/Makefile
+@@ -577,13 +577,6 @@ $(objpfx)lint-makefiles.out: scripts/lint-makefiles.sh
+ 	$(SHELL) $< "$(PYTHON)" `pwd` > $@ ; \
+ 	$(evaluate-test)
+ 
+-# Link libc.a as a whole to verify that it does not contain multiple
+-# definitions of any symbols.
+-tests-special += $(objpfx)link-static-libc.out
+-$(objpfx)link-static-libc.out:
+-	$(LINK.o) $(whole-archive) -r $(objpfx)libc.a -o /dev/null > $@ 2>&1; \
+-	$(evaluate-test)
+-
+ # Print test summary for tests in $1 .sum file;
+ # $2 is optional test identifier.
+ # Fail if there are unexpected failures in the test results.
+
+commit c16871e662cd0f3370173d916864b19e69f1bc9a
+Author: Sam James <sam@gentoo.org>
+Date:   Sat May 4 13:28:51 2024 +0100
+
+    Revert "i686: Fix multiple definitions of __memmove_chk and __memset_chk"
+    
+    This reverts commit ad92c483a4bd34db1cfb3eb625212ea64848244f.
+    
+    I had the wrong cherry-pick reference (the commit content is right; it's
+    just referring to a base that isn't upstream), but let's revert and reapply
+    for clarity.
+    
+    Signed-off-by: Sam James <sam@gentoo.org>
+
+diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S
+index effd958120..f230359ad6 100644
+--- a/sysdeps/i386/i686/memmove.S
++++ b/sysdeps/i386/i686/memmove.S
+@@ -29,7 +29,7 @@
+ #define SRC	DEST+4
+ #define LEN	SRC+4
+ 
+-#if defined SHARED && IS_IN (libc)
++#if defined PIC && IS_IN (libc)
+ ENTRY_CHK (__memmove_chk)
+ 	movl	12(%esp), %eax
+ 	cmpl	%eax, 16(%esp)
+diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
+index ab06771ea0..f02f5a6df7 100644
+--- a/sysdeps/i386/i686/memset.S
++++ b/sysdeps/i386/i686/memset.S
+@@ -27,7 +27,7 @@
+ #define LEN	CHR+4
+ 
+         .text
+-#if defined SHARED && IS_IN (libc)
++#if defined PIC && IS_IN (libc)
+ ENTRY_CHK (__memset_chk)
+ 	movl	12(%esp), %eax
+ 	cmpl	%eax, 16(%esp)
+
+commit 5141d4d83c17406f0eaea3e345ef2b52e10f386e
+Author: Sam James <sam@gentoo.org>
+Date:   Sat May 4 13:28:54 2024 +0100
+
+    Revert "i586: Fix multiple definitions of __memcpy_chk and __mempcpy_chk"
+    
+    This reverts commit 3148714ab61ad61281bae5a30f530d637034ac3b.
+    
+    I had the wrong cherry-pick reference (the commit content is right; it's
+    just referring to a base that isn't upstream), but let's revert and reapply
+    for clarity.
+    
+    Signed-off-by: Sam James <sam@gentoo.org>
+
+diff --git a/sysdeps/i386/i586/memcpy.S b/sysdeps/i386/i586/memcpy.S
+index 79856d498a..3e26f112d6 100644
+--- a/sysdeps/i386/i586/memcpy.S
++++ b/sysdeps/i386/i586/memcpy.S
+@@ -26,7 +26,7 @@
+ #define LEN	SRC+4
+ 
+         .text
+-#if defined SHARED && IS_IN (libc)
++#if defined PIC && IS_IN (libc)
+ ENTRY (__memcpy_chk)
+ 	movl	12(%esp), %eax
+ 	cmpl	%eax, 16(%esp)
+
+commit 8323a83abd73446dc434aceff66219712c09140b
+Author: Gabi Falk <gabifalk@gmx.com>
+Date:   Tue Apr 30 20:05:02 2024 +0000
+
+    i586: Fix multiple definitions of __memcpy_chk and __mempcpy_chk
+    
+    /home/bmg/install/compilers/x86_64-linux-gnu/lib/gcc/x86_64-glibc-linux-gnu/13.2.1/../../../../x86_64-glibc-linux-gnu/bin/ld: /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(memcpy_chk.o): in function `__memcpy_chk':
+    /home/bmg/src/glibc/debug/../sysdeps/i386/memcpy_chk.S:29: multiple definition of `__memcpy_chk';/home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(memcpy.o):/home/bmg/src/glibc/string/../sysdeps/i386/i586/memcpy.S:31: first defined here /home/bmg/install/compilers/x86_64-linux-gnu/lib/gcc/x86_64-glibc-linux-gnu/13.2.1/../../../../x86_64-glibc-linux-gnu/bin/ld: /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(mempcpy_chk.o): in function `__mempcpy_chk': /home/bmg/src/glibc/debug/../sysdeps/i386/mempcpy_chk.S:28: multiple definition of `__mempcpy_chk'; /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(mempcpy.o):/home/bmg/src/glibc/string/../sysdeps/i386/i586/memcpy.S:31: first defined here
+    
+    After this change, the static library built for i586, regardless of PIC
+    options, contains implementations of these functions respectively from
+    sysdeps/i386/memcpy_chk.S and sysdeps/i386/mempcpy_chk.S.  This ensures
+    that memcpy and mempcpy won't pull in __chk_fail and the routines it
+    calls.
+    
+    Reported-by: Florian Weimer <fweimer@redhat.com>
+    Signed-off-by: Gabi Falk <gabifalk@gmx.com>
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
+    (cherry picked from commit 0fdf4ba48ccce5abf567340b0ab8fa8ed8a9bc6e)
+
+diff --git a/sysdeps/i386/i586/memcpy.S b/sysdeps/i386/i586/memcpy.S
+index 3e26f112d6..79856d498a 100644
+--- a/sysdeps/i386/i586/memcpy.S
++++ b/sysdeps/i386/i586/memcpy.S
+@@ -26,7 +26,7 @@
+ #define LEN	SRC+4
+ 
+         .text
+-#if defined PIC && IS_IN (libc)
++#if defined SHARED && IS_IN (libc)
+ ENTRY (__memcpy_chk)
+ 	movl	12(%esp), %eax
+ 	cmpl	%eax, 16(%esp)
+
+commit 8b005d7869debac4d5cd67f65e49a0fad89da9ad
+Author: Gabi Falk <gabifalk@gmx.com>
+Date:   Tue Apr 30 20:05:03 2024 +0000
+
+    i686: Fix multiple definitions of __memmove_chk and __memset_chk
+    
+    Commit c73c96a4a1af1326df7f96eec58209e1e04066d8 updated memcpy.S and
+    mempcpy.S, but omitted memmove.S and memset.S.  As a result, the static
+    library built as PIC, whether with or without multiarch support,
+    contains two definitions for each of the __memmove_chk and __memset_chk
+    symbols.
+    
+    /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../i686-pc-linux-gnu/bin/ld: /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../lib/libc.a(memset-ia32.o): in function `__memset_chk':
+    /var/tmp/portage/sys-libs/glibc-2.39-r3/work/glibc-2.39/string/../sysdeps/i386/i686/memset.S:32: multiple definition of `__memset_chk'; /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../lib/libc.a(memset_chk.o):/var/tmp/portage/sys-libs/glibc-2.39-r3/work/glibc-2.39/debug/../sysdeps/i386/i686/multiarch/memset_chk.c:24: first defined here
+    
+    After this change, regardless of PIC options, the static library, built
+    for i686 with multiarch contains implementations of these functions
+    respectively from debug/memmove_chk.c and debug/memset_chk.c, and
+    without multiarch contains implementations of these functions
+    respectively from sysdeps/i386/memmove_chk.S and
+    sysdeps/i386/memset_chk.S.  This ensures that memmove and memset won't
+    pull in __chk_fail and the routines it calls.
+    
+    Reported-by: Sam James <sam@gentoo.org>
+    Tested-by: Sam James <sam@gentoo.org>
+    Fixes: c73c96a4a1 ("i686: Fix build with --disable-multiarch")
+    Signed-off-by: Gabi Falk <gabifalk@gmx.com>
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
+    (cherry picked from commit 5a2cf833f5772d6c37c7adac388dd9af9cc1c4b9)
+
+diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S
+index f230359ad6..effd958120 100644
+--- a/sysdeps/i386/i686/memmove.S
++++ b/sysdeps/i386/i686/memmove.S
+@@ -29,7 +29,7 @@
+ #define SRC	DEST+4
+ #define LEN	SRC+4
+ 
+-#if defined PIC && IS_IN (libc)
++#if defined SHARED && IS_IN (libc)
+ ENTRY_CHK (__memmove_chk)
+ 	movl	12(%esp), %eax
+ 	cmpl	%eax, 16(%esp)
+diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
+index f02f5a6df7..ab06771ea0 100644
+--- a/sysdeps/i386/i686/memset.S
++++ b/sysdeps/i386/i686/memset.S
+@@ -27,7 +27,7 @@
+ #define LEN	CHR+4
+ 
+         .text
+-#if defined PIC && IS_IN (libc)
++#if defined SHARED && IS_IN (libc)
+ ENTRY_CHK (__memset_chk)
+ 	movl	12(%esp), %eax
+ 	cmpl	%eax, 16(%esp)
+
+commit f8e462342189525e4605cf233b8f798d1c7f398d
+Author: Gabi Falk <gabifalk@gmx.com>
+Date:   Tue Apr 30 20:05:04 2024 +0000
+
+    Add a test to check for duplicate definitions in the static library
+    
+    This change follows two previous fixes addressing multiple definitions
+    of __memcpy_chk and __mempcpy_chk functions on i586, and __memmove_chk
+    and __memset_chk functions on i686.  The test is intended to prevent
+    such issues from occurring in the future.
+    
+    Signed-off-by: Gabi Falk <gabifalk@gmx.com>
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
+    (cherry picked from commit ded2e0753e9c46debeb2e0d26c5e560d2581d314)
+
+diff --git a/Makefile b/Makefile
+index 7052b46df8..2e351c0321 100644
+--- a/Makefile
++++ b/Makefile
+@@ -577,6 +577,13 @@ $(objpfx)lint-makefiles.out: scripts/lint-makefiles.sh
+ 	$(SHELL) $< "$(PYTHON)" `pwd` > $@ ; \
+ 	$(evaluate-test)
+ 
++# Link libc.a as a whole to verify that it does not contain multiple
++# definitions of any symbols.
++tests-special += $(objpfx)link-static-libc.out
++$(objpfx)link-static-libc.out:
++	$(LINK.o) $(whole-archive) -r $(objpfx)libc.a -o /dev/null > $@ 2>&1; \
++	$(evaluate-test)
++
+ # Print test summary for tests in $1 .sum file;
+ # $2 is optional test identifier.
+ # Fail if there are unexpected failures in the test results.