commit 6d1e3fb07b45e2e31e469b16cf21b24bccf8914c Author: Andreas K. Hüttel Date: Wed Jan 31 02:12:43 2024 +0100 Replace advisories directory Signed-off-by: Andreas K. Hüttel diff --git a/advisories/GLIBC-SA-2023-0001 b/advisories/GLIBC-SA-2023-0001 deleted file mode 100644 index 3d19c91b6a..0000000000 --- a/advisories/GLIBC-SA-2023-0001 +++ /dev/null @@ -1,14 +0,0 @@ -printf: incorrect output for integers with thousands separator and width field - -When the printf family of functions is called with a format specifier -that uses an (enable grouping) and a minimum width -specifier, the resulting output could be larger than reasonably expected -by a caller that computed a tight bound on the buffer size. The -resulting larger than expected output could result in a buffer overflow -in the printf family of functions. - -CVE-Id: CVE-2023-25139 -Public-Date: 2023-02-02 -Vulnerable-Commit: e88b9f0e5cc50cab57a299dc7efe1a4eb385161d (2.37) -Fix-Commit: c980549cc6a1c03c23cc2fe3e7b0fe626a0364b0 (2.38) -Fix-Commit: 07b9521fc6369d000216b96562ff7c0ed32a16c4 (2.37-4) diff --git a/advisories/GLIBC-SA-2023-0002 b/advisories/GLIBC-SA-2023-0002 deleted file mode 100644 index 5122669a64..0000000000 --- a/advisories/GLIBC-SA-2023-0002 +++ /dev/null @@ -1,15 +0,0 @@ -getaddrinfo: Stack read overflow in no-aaaa mode - -If the system is configured in no-aaaa mode via /etc/resolv.conf, -getaddrinfo is called for the AF_UNSPEC address family, and a DNS -response is received over TCP that is larger than 2048 bytes, -getaddrinfo may potentially disclose stack contents via the returned -address data, or crash. - -CVE-Id: CVE-2023-4527 -Public-Date: 2023-09-12 -Vulnerable-Commit: f282cdbe7f436c75864e5640a409a10485e9abb2 (2.36) -Fix-Commit: bd77dd7e73e3530203be1c52c8a29d08270cb25d (2.39) -Fix-Commit: 4ea972b7edd7e36610e8cde18bf7a8149d7bac4f (2.36-113) -Fix-Commit: b7529346025a130fee483d42178b5c118da971bb (2.37-38) -Fix-Commit: b25508dd774b617f99419bdc3cf2ace4560cd2d6 (2.38-19) diff --git a/advisories/GLIBC-SA-2023-0003 b/advisories/GLIBC-SA-2023-0003 deleted file mode 100644 index d3aef80348..0000000000 --- a/advisories/GLIBC-SA-2023-0003 +++ /dev/null @@ -1,15 +0,0 @@ -getaddrinfo: Potential use-after-free - -When an NSS plugin only implements the _gethostbyname2_r and -_getcanonname_r callbacks, getaddrinfo could use memory that was freed -during buffer resizing, potentially causing a crash or read or write to -arbitrary memory. - -CVE-Id: CVE-2023-4806 -Public-Date: 2023-09-12 -Fix-Commit: 973fe93a5675c42798b2161c6f29c01b0e243994 (2.39) -Fix-Commit: e09ee267c03e3150c2c9ba28625ab130705a485e (2.34-420) -Fix-Commit: e3ccb230a961b4797510e6a1f5f21fd9021853e7 (2.35-270) -Fix-Commit: a9728f798ec7f05454c95637ee6581afaa9b487d (2.36-115) -Fix-Commit: 6529a7466c935f36e9006b854d6f4e1d4876f942 (2.37-39) -Fix-Commit: 00ae4f10b504bc4564e9f22f00907093f1ab9338 (2.38-20) diff --git a/advisories/GLIBC-SA-2023-0004 b/advisories/GLIBC-SA-2023-0004 deleted file mode 100644 index 5286a7aa54..0000000000 --- a/advisories/GLIBC-SA-2023-0004 +++ /dev/null @@ -1,16 +0,0 @@ -tunables: local privilege escalation through buffer overflow - -If a tunable of the form NAME=NAME=VAL is passed in the environment of a -setuid program and NAME is valid, it may result in a buffer overflow, -which could be exploited to achieve escalated privileges. This flaw was -introduced in glibc 2.34. - -CVE-Id: CVE-2023-4911 -Public-Date: 2023-10-03 -Vulnerable-Commit: 2ed18c5b534d9e92fc006202a5af0df6b72e7aca (2.34) -Fix-Commit: 1056e5b4c3f2d90ed2b4a55f96add28da2f4c8fa (2.39) -Fix-Commit: dcc367f148bc92e7f3778a125f7a416b093964d9 (2.34-423) -Fix-Commit: c84018a05aec80f5ee6f682db0da1130b0196aef (2.35-274) -Fix-Commit: 22955ad85186ee05834e47e665056148ca07699c (2.36-118) -Fix-Commit: b4e23c75aea756b4bddc4abcf27a1c6dca8b6bd3 (2.37-45) -Fix-Commit: 750a45a783906a19591fb8ff6b7841470f1f5701 (2.38-27) diff --git a/advisories/GLIBC-SA-2023-0005 b/advisories/GLIBC-SA-2023-0005 deleted file mode 100644 index cc4eb90b82..0000000000 --- a/advisories/GLIBC-SA-2023-0005 +++ /dev/null @@ -1,18 +0,0 @@ -getaddrinfo: DoS due to memory leak - -The fix for CVE-2023-4806 introduced a memory leak when an application -calls getaddrinfo for AF_INET6 with AI_CANONNAME, AI_ALL and AI_V4MAPPED -flags set. - -CVE-Id: CVE-2023-5156 -Public-Date: 2023-09-25 -Vulnerable-Commit: e09ee267c03e3150c2c9ba28625ab130705a485e (2.34-420) -Vulnerable-Commit: e3ccb230a961b4797510e6a1f5f21fd9021853e7 (2.35-270) -Vulnerable-Commit: a9728f798ec7f05454c95637ee6581afaa9b487d (2.36-115) -Vulnerable-Commit: 6529a7466c935f36e9006b854d6f4e1d4876f942 (2.37-39) -Vulnerable-Commit: 00ae4f10b504bc4564e9f22f00907093f1ab9338 (2.38-20) -Fix-Commit: 8006457ab7e1cd556b919f477348a96fe88f2e49 (2.34-421) -Fix-Commit: 17092c0311f954e6f3c010f73ce3a78c24ac279a (2.35-272) -Fix-Commit: 856bac55f98dc840e7c27cfa82262b933385de90 (2.36-116) -Fix-Commit: 4473d1b87d04b25cdd0e0354814eeaa421328268 (2.37-42) -Fix-Commit: 5ee59ca371b99984232d7584fe2b1a758b4421d3 (2.38-24) diff --git a/advisories/GLIBC-SA-2024-0001 b/advisories/GLIBC-SA-2024-0001 deleted file mode 100644 index 28931c75ae..0000000000 --- a/advisories/GLIBC-SA-2024-0001 +++ /dev/null @@ -1,15 +0,0 @@ -syslog: Heap buffer overflow in __vsyslog_internal - -__vsyslog_internal did not handle a case where printing a SYSLOG_HEADER -containing a long program name failed to update the required buffer -size, leading to the allocation and overflow of a too-small buffer on -the heap. - -CVE-Id: CVE-2023-6246 -Public-Date: 2024-01-30 -Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37) -Fix-Commit: 6bd0e4efcc78f3c0115e5ea9739a1642807450da (2.39) -Fix-Commit: 23514c72b780f3da097ecf33a793b7ba9c2070d2 (2.38-42) -Fix-Commit: 97a4292aa4a2642e251472b878d0ec4c46a0e59a (2.37-57) -Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16) -Fix-Commit: d1a83b6767f68b3cb5b4b4ea2617254acd040c82 (2.36-126) diff --git a/advisories/GLIBC-SA-2024-0002 b/advisories/GLIBC-SA-2024-0002 deleted file mode 100644 index 940bfcf2fc..0000000000 --- a/advisories/GLIBC-SA-2024-0002 +++ /dev/null @@ -1,15 +0,0 @@ -syslog: Heap buffer overflow in __vsyslog_internal - -__vsyslog_internal used the return value of snprintf/vsnprintf to -calculate buffer sizes for memory allocation. If these functions (for -any reason) failed and returned -1, the resulting buffer would be too -small to hold output. - -CVE-Id: CVE-2023-6779 -Public-Date: 2024-01-30 -Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37) -Fix-Commit: 7e5a0c286da33159d47d0122007aac016f3e02cd (2.39) -Fix-Commit: d0338312aace5bbfef85e03055e1212dd0e49578 (2.38-43) -Fix-Commit: 67062eccd9a65d7fda9976a56aeaaf6c25a80214 (2.37-58) -Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16) -Fix-Commit: 2bc9d7c002bdac38b5c2a3f11b78e309d7765b83 (2.36-127) diff --git a/advisories/GLIBC-SA-2024-0003 b/advisories/GLIBC-SA-2024-0003 deleted file mode 100644 index b43a5150ab..0000000000 --- a/advisories/GLIBC-SA-2024-0003 +++ /dev/null @@ -1,13 +0,0 @@ -syslog: Integer overflow in __vsyslog_internal - -__vsyslog_internal calculated a buffer size by adding two integers, but -did not first check if the addition would overflow. - -CVE-Id: CVE-2023-6780 -Public-Date: 2024-01-30 -Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37) -Fix-Commit: ddf542da94caf97ff43cc2875c88749880b7259b (2.39) -Fix-Commit: d37c2b20a4787463d192b32041c3406c2bd91de0 (2.38-44) -Fix-Commit: 2b58cba076e912961ceaa5fa58588e4b10f791c0 (2.37-59) -Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16) -Fix-Commit: b9b7d6a27aa0632f334352fa400771115b3c69b7 (2.36-128) diff --git a/advisories/README b/advisories/README deleted file mode 100644 index 94e68b1350..0000000000 --- a/advisories/README +++ /dev/null @@ -1,73 +0,0 @@ -GNU C Library Security Advisory Format -====================================== - -Security advisories in this directory follow a simple git commit log -format, with a heading and free-format description augmented with tags -to allow parsing key information. References to code changes are -specific to the glibc repository and follow a specific format: - - Tag-name: (release-version) - -The indicates a specific commit in the repository. The -release-version indicates the publicly consumable release in which this -commit is known to exist. The release-version is derived from the -git-describe format, (i.e. stripped out from glibc-2.34.NNN-gxxxx) and -is of the form 2.34-NNN. If the -NNN suffix is absent, it means that -the change is in that release tarball, otherwise the change is on the -release/2.YY/master branch and not in any released tarball. - -The following tags are currently being used: - -CVE-Id: -This is the CVE-Id assigned under the CVE Program -(https://www.cve.org/). - -Public-Date: -The date this issue became publicly known. - -Vulnerable-Commit: -The commit that introduced this vulnerability. There could be multiple -entries, one for each release branch in the glibc repository; the -release-version portion of this tag should tell you which branch this is -on. - -Fix-Commit: -The commit that fixed this vulnerability. There could be multiple -entries for each release branch in the glibc repository, indicating that -all of those commits contributed to fixing that issue in each of those -branches. - -Adding an Advisory ------------------- - -An advisory for a CVE needs to be added on the master branch in two steps: - -1. Add the text of the advisory without any Fix-Commit tags along with - the fix for the CVE. Add the Vulnerable-Commit tag, if applicable. - The advisories directory does not exist in release branches, so keep - the advisory text commit distinct from the code changes, to ease - backports. Ask for the GLIBC-SA advisory number from the security - team. - -2. Finish all backports on release branches and then back on the msater - branch, add all commit refs to the advisory using the Fix-Commit - tags. Don't bother adding the release-version subscript since the - next step will overwrite it. - -3. Run the process-advisories.sh script in the scripts directory on the - advisory: - - scripts/process-advisories.sh update GLIBC-SA-YYYY-NNNN - - (replace YYYY-NNNN with the actual advisory number). - -4. Verify the updated advisory and push the result. - -Getting a NEWS snippet from advisories --------------------------------------- - -Run: - - scripts/process-advisories.sh news - -and copy the content into the NEWS file. commit 63295e4fda1f6dab4bf7442706fe303bf283036c Author: Adhemerval Zanella Date: Mon Feb 5 16:10:24 2024 +0000 arm: Remove wrong ldr from _dl_start_user (BZ 31339) The commit 49d877a80b29d3002887b084eec6676d9f5fec18 (arm: Remove _dl_skip_args usage) removed the _SKIP_ARGS literal, which was previously loader to r4 on loader _start. However, the cleanup did not remove the following 'ldr r4, [sl, r4]' on _dl_start_user, used to check to skip the arguments after ld self-relocations. In my testing, the kernel initially set r4 to 0, which makes the ldr instruction just read the _GLOBAL_OFFSET_TABLE_. However, since r4 is a callee-saved register; a different runtime might not zero initialize it and thus trigger an invalid memory access. Checked on arm-linux-gnu. Reported-by: Adrian Ratiu Reviewed-by: Szabolcs Nagy (cherry picked from commit 1e25112dc0cb2515d27d8d178b1ecce778a9d37a) diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h index b857bbc868..dd1a0f6b6e 100644 --- a/sysdeps/arm/dl-machine.h +++ b/sysdeps/arm/dl-machine.h @@ -139,7 +139,6 @@ _start:\n\ _dl_start_user:\n\ adr r6, .L_GET_GOT\n\ add sl, sl, r6\n\ - ldr r4, [sl, r4]\n\ @ save the entry point in another register\n\ mov r6, r0\n\ @ get the original arg count\n\ commit 312e159626b67fe11f39e83e222cf4348a3962f3 Author: Adhemerval Zanella Date: Thu Feb 1 14:29:53 2024 -0300 mips: FIx clone3 implementation (BZ 31325) For o32 we need to setup a minimal stack frame to allow cprestore on __thread_start_clone3 (which instruct the linker to save the gp for PIC). Also, there is no guarantee by kABI that $8 will be preserved after syscall execution, so we need to save it on the provided stack. Checked on mipsel-linux-gnu. Reported-by: Khem Raj Tested-by: Khem Raj (cherry picked from commit bbd248ac0d75efdef8fe61ea69b1fb25fb95b6e7) diff --git a/sysdeps/unix/sysv/linux/mips/clone3.S b/sysdeps/unix/sysv/linux/mips/clone3.S index e9fec2fa47..481b8ae963 100644 --- a/sysdeps/unix/sysv/linux/mips/clone3.S +++ b/sysdeps/unix/sysv/linux/mips/clone3.S @@ -37,11 +37,6 @@ .text .set nomips16 -#if _MIPS_SIM == _ABIO32 -# define EXTRA_LOCALS 1 -#else -# define EXTRA_LOCALS 0 -#endif #define FRAMESZ ((NARGSAVE*SZREG)+ALSZ)&ALMASK GPOFF= FRAMESZ-(1*SZREG) NESTED(__clone3, SZREG, sp) @@ -68,8 +63,31 @@ NESTED(__clone3, SZREG, sp) beqz a0, L(error) /* No NULL cl_args pointer. */ beqz a2, L(error) /* No NULL function pointer. */ +#if _MIPS_SIM == _ABIO32 + /* Both stack and stack_size on clone_args are defined as uint64_t, and + there is no need to handle values larger than to 32 bits for o32. */ +# if __BYTE_ORDER == __BIG_ENDIAN +# define CL_STACKPOINTER_OFFSET 44 +# define CL_STACKSIZE_OFFSET 52 +# else +# define CL_STACKPOINTER_OFFSET 40 +# define CL_STACKSIZE_OFFSET 48 +# endif + + /* For o32 we need to setup a minimal stack frame to allow cprestore + on __thread_start_clone3. Also there is no guarantee by kABI that + $8 will be preserved after syscall execution (so we need to save it + on the provided stack). */ + lw t0, CL_STACKPOINTER_OFFSET(a0) /* Load the stack pointer. */ + lw t1, CL_STACKSIZE_OFFSET(a0) /* Load the stack_size. */ + addiu t1, -32 /* Update the stack size. */ + addu t2, t1, t0 /* Calculate the thread stack. */ + sw a3, 0(t2) /* Save argument pointer. */ + sw t1, CL_STACKSIZE_OFFSET(a0) /* Save the new stack size. */ +#else move $8, a3 /* a3 is set to 0/1 for syscall success/error while a4/$8 is returned unmodified. */ +#endif /* Do the system call, the kernel expects: v0: system call number @@ -125,7 +143,11 @@ L(thread_start_clone3): /* Restore the arg for user's function. */ move t9, a2 /* Function pointer. */ +#if _MIPS_SIM == _ABIO32 + PTR_L a0, 0(sp) +#else move a0, $8 /* Argument pointer. */ +#endif /* Call the user's function. */ jal t9 commit d0724994de40934c552f1f68de89053848a44927 Author: Xi Ruoyao Date: Thu Feb 22 21:26:55 2024 +0100 math: Update mips64 ulps Signed-off-by: Andreas K. Hüttel (cherry picked from commit e2a65ecc4b30a797df7dc6529f09b712aa256029) diff --git a/sysdeps/mips/mips64/libm-test-ulps b/sysdeps/mips/mips64/libm-test-ulps index 78969745b2..933aba4735 100644 --- a/sysdeps/mips/mips64/libm-test-ulps +++ b/sysdeps/mips/mips64/libm-test-ulps @@ -1066,17 +1066,17 @@ double: 1 ldouble: 1 Function: "j0": -double: 2 +double: 3 float: 9 ldouble: 2 Function: "j0_downward": -double: 5 +double: 6 float: 9 ldouble: 9 Function: "j0_towardzero": -double: 6 +double: 7 float: 9 ldouble: 9 @@ -1146,6 +1146,7 @@ float: 6 ldouble: 8 Function: "log": +double: 1 float: 1 ldouble: 1 commit e0910f1d3278f05439fb434ee528fc9be1b6bd5e Author: Stefan Liebler Date: Thu Feb 22 15:03:27 2024 +0100 S390: Do not clobber r7 in clone [BZ #31402] Starting with commit e57d8fc97b90127de4ed3e3a9cdf663667580935 "S390: Always use svc 0" clone clobbers the call-saved register r7 in error case: function or stack is NULL. This patch restores the saved registers also in the error case. Furthermore the existing test misc/tst-clone is extended to check all error cases and that clone does not clobber registers in this error case. (cherry picked from commit 02782fd12849b6673cb5c2728cb750e8ec295aa3) diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/clone.S b/sysdeps/unix/sysv/linux/s390/s390-32/clone.S index 4c882ef2ee..a7a863242c 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-32/clone.S +++ b/sysdeps/unix/sysv/linux/s390/s390-32/clone.S @@ -53,6 +53,7 @@ ENTRY(__clone) br %r14 error: lhi %r2,-EINVAL + lm %r6,%r7,24(%r15) /* Load registers. */ j SYSCALL_ERROR_LABEL PSEUDO_END (__clone) diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/clone.S b/sysdeps/unix/sysv/linux/s390/s390-64/clone.S index 4eb104be71..c552a6b8de 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-64/clone.S +++ b/sysdeps/unix/sysv/linux/s390/s390-64/clone.S @@ -54,6 +54,7 @@ ENTRY(__clone) br %r14 error: lghi %r2,-EINVAL + lmg %r6,%r7,48(%r15) /* Restore registers. */ jg SYSCALL_ERROR_LABEL PSEUDO_END (__clone) diff --git a/sysdeps/unix/sysv/linux/tst-clone.c b/sysdeps/unix/sysv/linux/tst-clone.c index 470676ab2b..2bc7124983 100644 --- a/sysdeps/unix/sysv/linux/tst-clone.c +++ b/sysdeps/unix/sysv/linux/tst-clone.c @@ -16,12 +16,16 @@ License along with the GNU C Library; if not, see . */ -/* BZ #2386 */ +/* BZ #2386, BZ #31402 */ #include #include #include #include #include +#include /* For _STACK_GROWS_{UP,DOWN}. */ +#include + +volatile unsigned v = 0xdeadbeef; int child_fn(void *arg) { @@ -30,22 +34,67 @@ int child_fn(void *arg) } static int -do_test (void) +__attribute__((noinline)) +do_clone (int (*fn)(void *), void *stack) { int result; + unsigned int a = v; + unsigned int b = v; + unsigned int c = v; + unsigned int d = v; + unsigned int e = v; + unsigned int f = v; + unsigned int g = v; + unsigned int h = v; + unsigned int i = v; + unsigned int j = v; + unsigned int k = v; + unsigned int l = v; + unsigned int m = v; + unsigned int n = v; + unsigned int o = v; + + result = clone (fn, stack, 0, NULL); + + /* Check that clone does not clobber call-saved registers. */ + TEST_VERIFY (a == v && b == v && c == v && d == v && e == v && f == v + && g == v && h == v && i == v && j == v && k == v && l == v + && m == v && n == v && o == v); + + return result; +} + +static void +__attribute__((noinline)) +do_test_single (int (*fn)(void *), void *stack) +{ + printf ("%s (fn=%p, stack=%p)\n", __FUNCTION__, fn, stack); + errno = 0; + + int result = do_clone (fn, stack); + + TEST_COMPARE (errno, EINVAL); + TEST_COMPARE (result, -1); +} - result = clone (child_fn, NULL, 0, NULL); +static int +do_test (void) +{ + char st[128 * 1024] __attribute__ ((aligned)); + void *stack = NULL; +#if _STACK_GROWS_DOWN + stack = st + sizeof (st); +#elif _STACK_GROWS_UP + stack = st; +#else +# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP" +#endif - if (errno != EINVAL || result != -1) - { - printf ("FAIL: clone()=%d (wanted -1) errno=%d (wanted %d)\n", - result, errno, EINVAL); - return 1; - } + do_test_single (child_fn, NULL); + do_test_single (NULL, stack); + do_test_single (NULL, NULL); - puts ("All OK"); return 0; } -#define TEST_FUNCTION do_test () -#include "../test-skeleton.c" +#include commit 1b9c1a0047fb26a65a9b2a7b8cd977243f7d353c Author: Jakub Jelinek Date: Wed Jan 31 19:17:27 2024 +0100 Use gcc __builtin_stdc_* builtins in stdbit.h if possible The following patch uses the GCC 14 __builtin_stdc_* builtins in stdbit.h for the type-generic macros, so that when compiled with GCC 14 or later, it supports not just 8/16/32/64-bit unsigned integers, but also 128-bit (if target supports them) and unsigned _BitInt (any supported precision). And so that the macros don't expand arguments multiple times and can be evaluated in constant expressions. The new testcase is gcc's gcc/testsuite/gcc.dg/builtin-stdc-bit-1.c adjusted to test stdbit.h and the type-generic macros in there instead of the builtins and adjusted to use glibc test framework rather than gcc style tests with __builtin_abort (). Signed-off-by: Jakub Jelinek Reviewed-by: Joseph Myers (cherry picked from commit da89496337b97e6a2aaf1e81d55cf998f6db1070) diff --git a/manual/stdbit.texi b/manual/stdbit.texi index fe41c671d8..6c75ed9a20 100644 --- a/manual/stdbit.texi +++ b/manual/stdbit.texi @@ -32,7 +32,13 @@ and @code{unsigned long long int}. In addition, there is a corresponding type-generic macro (not listed below), named the same as the functions but without any suffix such as @samp{_uc}. The type-generic macro can only be used with an argument of an unsigned -integer type with a width of 8, 16, 32 or 64 bits. +integer type with a width of 8, 16, 32 or 64 bits, or when using +a compiler with support for +@uref{https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html,@code{__builtin_stdc_bit_ceil}}, +etc.@:, built-in functions such as GCC 14.1 or later +any unsigned integer type those built-in functions support. +In GCC 14.1 that includes support for @code{unsigned __int128} and +@code{unsigned _BitInt(@var{n})} if supported by the target. @deftypefun {unsigned int} stdc_leading_zeros_uc (unsigned char @var{x}) @deftypefunx {unsigned int} stdc_leading_zeros_us (unsigned short @var{x}) diff --git a/stdlib/Makefile b/stdlib/Makefile index d587f054d1..9898cc5d8a 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -308,6 +308,7 @@ tests := \ tst-setcontext10 \ tst-setcontext11 \ tst-stdbit-Wconversion \ + tst-stdbit-builtins \ tst-stdc_bit_ceil \ tst-stdc_bit_floor \ tst-stdc_bit_width \ diff --git a/stdlib/stdbit.h b/stdlib/stdbit.h index f334eb174d..2801590c63 100644 --- a/stdlib/stdbit.h +++ b/stdlib/stdbit.h @@ -64,9 +64,13 @@ extern unsigned int stdc_leading_zeros_ul (unsigned long int __x) __extension__ extern unsigned int stdc_leading_zeros_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_leading_zeros(x) \ +#if __glibc_has_builtin (__builtin_stdc_leading_zeros) +# define stdc_leading_zeros(x) (__builtin_stdc_leading_zeros (x)) +#else +# define stdc_leading_zeros(x) \ (stdc_leading_zeros_ull (x) \ - (unsigned int) (8 * (sizeof (0ULL) - sizeof (x)))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) static __always_inline unsigned int @@ -116,9 +120,13 @@ extern unsigned int stdc_leading_ones_ul (unsigned long int __x) __extension__ extern unsigned int stdc_leading_ones_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_leading_ones(x) \ +#if __glibc_has_builtin (__builtin_stdc_leading_ones) +# define stdc_leading_ones(x) (__builtin_stdc_leading_ones (x)) +#else +# define stdc_leading_ones(x) \ (stdc_leading_ones_ull ((unsigned long long int) (x) \ << 8 * (sizeof (0ULL) - sizeof (x)))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) static __always_inline unsigned int @@ -168,11 +176,15 @@ extern unsigned int stdc_trailing_zeros_ul (unsigned long int __x) __extension__ extern unsigned int stdc_trailing_zeros_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_trailing_zeros(x) \ +#if __glibc_has_builtin (__builtin_stdc_trailing_zeros) +# define stdc_trailing_zeros(x) (__builtin_stdc_trailing_zeros (x)) +#else +# define stdc_trailing_zeros(x) \ (sizeof (x) == 8 ? stdc_trailing_zeros_ull (x) \ : sizeof (x) == 4 ? stdc_trailing_zeros_ui (x) \ : sizeof (x) == 2 ? stdc_trailing_zeros_us (__pacify_uint16 (x)) \ : stdc_trailing_zeros_uc (__pacify_uint8 (x))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll) static __always_inline unsigned int @@ -222,7 +234,11 @@ extern unsigned int stdc_trailing_ones_ul (unsigned long int __x) __extension__ extern unsigned int stdc_trailing_ones_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_trailing_ones(x) (stdc_trailing_ones_ull (x)) +#if __glibc_has_builtin (__builtin_stdc_trailing_ones) +# define stdc_trailing_ones(x) (__builtin_stdc_trailing_ones (x)) +#else +# define stdc_trailing_ones(x) (stdc_trailing_ones_ull (x)) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll) static __always_inline unsigned int @@ -272,11 +288,15 @@ extern unsigned int stdc_first_leading_zero_ul (unsigned long int __x) __extension__ extern unsigned int stdc_first_leading_zero_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_first_leading_zero(x) \ +#if __glibc_has_builtin (__builtin_stdc_first_leading_zero) +# define stdc_first_leading_zero(x) (__builtin_stdc_first_leading_zero (x)) +#else +# define stdc_first_leading_zero(x) \ (sizeof (x) == 8 ? stdc_first_leading_zero_ull (x) \ : sizeof (x) == 4 ? stdc_first_leading_zero_ui (x) \ : sizeof (x) == 2 ? stdc_first_leading_zero_us (__pacify_uint16 (x)) \ : stdc_first_leading_zero_uc (__pacify_uint8 (x))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) static __always_inline unsigned int @@ -326,11 +346,15 @@ extern unsigned int stdc_first_leading_one_ul (unsigned long int __x) __extension__ extern unsigned int stdc_first_leading_one_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_first_leading_one(x) \ +#if __glibc_has_builtin (__builtin_stdc_first_leading_one) +# define stdc_first_leading_one(x) (__builtin_stdc_first_leading_one (x)) +#else +# define stdc_first_leading_one(x) \ (sizeof (x) == 8 ? stdc_first_leading_one_ull (x) \ : sizeof (x) == 4 ? stdc_first_leading_one_ui (x) \ : sizeof (x) == 2 ? stdc_first_leading_one_us (__pacify_uint16 (x)) \ : stdc_first_leading_one_uc (__pacify_uint8 (x))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) static __always_inline unsigned int @@ -380,11 +404,15 @@ extern unsigned int stdc_first_trailing_zero_ul (unsigned long int __x) __extension__ extern unsigned int stdc_first_trailing_zero_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_first_trailing_zero(x) \ +#if __glibc_has_builtin (__builtin_stdc_first_trailing_zero) +# define stdc_first_trailing_zero(x) (__builtin_stdc_first_trailing_zero (x)) +#else +# define stdc_first_trailing_zero(x) \ (sizeof (x) == 8 ? stdc_first_trailing_zero_ull (x) \ : sizeof (x) == 4 ? stdc_first_trailing_zero_ui (x) \ : sizeof (x) == 2 ? stdc_first_trailing_zero_us (__pacify_uint16 (x)) \ : stdc_first_trailing_zero_uc (__pacify_uint8 (x))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll) static __always_inline unsigned int @@ -434,11 +462,15 @@ extern unsigned int stdc_first_trailing_one_ul (unsigned long int __x) __extension__ extern unsigned int stdc_first_trailing_one_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_first_trailing_one(x) \ +#if __glibc_has_builtin (__builtin_stdc_first_trailing_one) +# define stdc_first_trailing_one(x) (__builtin_stdc_first_trailing_one (x)) +#else +# define stdc_first_trailing_one(x) \ (sizeof (x) == 8 ? stdc_first_trailing_one_ull (x) \ : sizeof (x) == 4 ? stdc_first_trailing_one_ui (x) \ : sizeof (x) == 2 ? stdc_first_trailing_one_us (__pacify_uint16 (x)) \ : stdc_first_trailing_one_uc (__pacify_uint8 (x))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll) static __always_inline unsigned int @@ -488,9 +520,13 @@ extern unsigned int stdc_count_zeros_ul (unsigned long int __x) __extension__ extern unsigned int stdc_count_zeros_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_count_zeros(x) \ +#if __glibc_has_builtin (__builtin_stdc_count_zeros) +# define stdc_count_zeros(x) (__builtin_stdc_count_zeros (x)) +#else +# define stdc_count_zeros(x) \ (stdc_count_zeros_ull (x) \ - (unsigned int) (8 * (sizeof (0ULL) - sizeof (x)))) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_popcountll) static __always_inline unsigned int @@ -540,7 +576,11 @@ extern unsigned int stdc_count_ones_ul (unsigned long int __x) __extension__ extern unsigned int stdc_count_ones_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_count_ones(x) (stdc_count_ones_ull (x)) +#if __glibc_has_builtin (__builtin_stdc_count_ones) +# define stdc_count_ones(x) (__builtin_stdc_count_ones (x)) +#else +# define stdc_count_ones(x) (stdc_count_ones_ull (x)) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_popcountll) static __always_inline unsigned int @@ -590,10 +630,14 @@ extern bool stdc_has_single_bit_ul (unsigned long int __x) __extension__ extern bool stdc_has_single_bit_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_has_single_bit(x) \ +#if __glibc_has_builtin (__builtin_stdc_has_single_bit) +# define stdc_has_single_bit(x) (__builtin_stdc_has_single_bit (x)) +#else +# define stdc_has_single_bit(x) \ ((bool) (sizeof (x) <= sizeof (unsigned int) \ ? stdc_has_single_bit_ui (x) \ : stdc_has_single_bit_ull (x))) +#endif static __always_inline bool __hsb64_inline (uint64_t __x) @@ -641,7 +685,11 @@ extern unsigned int stdc_bit_width_ul (unsigned long int __x) __extension__ extern unsigned int stdc_bit_width_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_bit_width(x) (stdc_bit_width_ull (x)) +#if __glibc_has_builtin (__builtin_stdc_bit_width) +# define stdc_bit_width(x) (__builtin_stdc_bit_width (x)) +#else +# define stdc_bit_width(x) (stdc_bit_width_ull (x)) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) static __always_inline unsigned int @@ -691,7 +739,11 @@ extern unsigned long int stdc_bit_floor_ul (unsigned long int __x) __extension__ extern unsigned long long int stdc_bit_floor_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_bit_floor(x) ((__typeof (x)) stdc_bit_floor_ull (x)) +#if __glibc_has_builtin (__builtin_stdc_bit_floor) +# define stdc_bit_floor(x) (__builtin_stdc_bit_floor (x)) +#else +# define stdc_bit_floor(x) ((__typeof (x)) stdc_bit_floor_ull (x)) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) static __always_inline uint64_t @@ -743,7 +795,11 @@ extern unsigned long int stdc_bit_ceil_ul (unsigned long int __x) __extension__ extern unsigned long long int stdc_bit_ceil_ull (unsigned long long int __x) __THROW __attribute_const__; -#define stdc_bit_ceil(x) ((__typeof (x)) stdc_bit_ceil_ull (x)) +#if __glibc_has_builtin (__builtin_stdc_bit_ceil) +# define stdc_bit_ceil(x) (__builtin_stdc_bit_ceil (x)) +#else +# define stdc_bit_ceil(x) ((__typeof (x)) stdc_bit_ceil_ull (x)) +#endif #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) static __always_inline uint64_t diff --git a/stdlib/tst-stdbit-builtins.c b/stdlib/tst-stdbit-builtins.c new file mode 100644 index 0000000000..536841ca8a --- /dev/null +++ b/stdlib/tst-stdbit-builtins.c @@ -0,0 +1,778 @@ +/* Test type-generic macros with compiler __builtin_stdc_* support. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +#if __glibc_has_builtin (__builtin_stdc_leading_zeros) \ + && __glibc_has_builtin (__builtin_stdc_leading_ones) \ + && __glibc_has_builtin (__builtin_stdc_trailing_zeros) \ + && __glibc_has_builtin (__builtin_stdc_trailing_ones) \ + && __glibc_has_builtin (__builtin_stdc_first_leading_zero) \ + && __glibc_has_builtin (__builtin_stdc_first_leading_one) \ + && __glibc_has_builtin (__builtin_stdc_first_trailing_zero) \ + && __glibc_has_builtin (__builtin_stdc_first_trailing_one) \ + && __glibc_has_builtin (__builtin_stdc_count_zeros) \ + && __glibc_has_builtin (__builtin_stdc_count_ones) \ + && __glibc_has_builtin (__builtin_stdc_has_single_bit) \ + && __glibc_has_builtin (__builtin_stdc_bit_width) \ + && __glibc_has_builtin (__builtin_stdc_bit_floor) \ + && __glibc_has_builtin (__builtin_stdc_bit_ceil) + +# if !defined (BITINT_MAXWIDTH) && defined (__BITINT_MAXWIDTH__) +# define BITINT_MAXWIDTH __BITINT_MAXWIDTH__ +# endif + +typedef unsigned char uc; +typedef unsigned short us; +typedef unsigned int ui; +typedef unsigned long int ul; +typedef unsigned long long int ull; + +# define expr_has_type(e, t) _Generic (e, default : 0, t : 1) + +static int +do_test (void) +{ + TEST_COMPARE (stdc_leading_zeros ((uc) 0), CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_leading_zeros ((uc) 0), ui), 1); + TEST_COMPARE (stdc_leading_zeros ((us) 0), sizeof (short) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_leading_zeros ((us) 0), ui), 1); + TEST_COMPARE (stdc_leading_zeros (0U), sizeof (int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_leading_zeros (0U), ui), 1); + TEST_COMPARE (stdc_leading_zeros (0UL), sizeof (long int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_leading_zeros (0UL), ui), 1); + TEST_COMPARE (stdc_leading_zeros (0ULL), sizeof (long long int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_leading_zeros (0ULL), ui), 1); + TEST_COMPARE (stdc_leading_zeros ((uc) ~0U), 0); + TEST_COMPARE (stdc_leading_zeros ((us) ~0U), 0); + TEST_COMPARE (stdc_leading_zeros (~0U), 0); + TEST_COMPARE (stdc_leading_zeros (~0UL), 0); + TEST_COMPARE (stdc_leading_zeros (~0ULL), 0); + TEST_COMPARE (stdc_leading_zeros ((uc) 3), CHAR_BIT - 2); + TEST_COMPARE (stdc_leading_zeros ((us) 9), sizeof (short) * CHAR_BIT - 4); + TEST_COMPARE (stdc_leading_zeros (34U), sizeof (int) * CHAR_BIT - 6); + TEST_COMPARE (stdc_leading_zeros (130UL), sizeof (long int) * CHAR_BIT - 8); + TEST_COMPARE (stdc_leading_zeros (512ULL), + sizeof (long long int) * CHAR_BIT - 10); + TEST_COMPARE (stdc_leading_ones ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones ((uc) 0), ui), 1); + TEST_COMPARE (stdc_leading_ones ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones ((us) 0), ui), 1); + TEST_COMPARE (stdc_leading_ones (0U), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones (0U), ui), 1); + TEST_COMPARE (stdc_leading_ones (0UL), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones (0UL), ui), 1); + TEST_COMPARE (stdc_leading_ones (0ULL), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones (0ULL), ui), 1); + TEST_COMPARE (stdc_leading_ones ((uc) ~0U), CHAR_BIT); + TEST_COMPARE (stdc_leading_ones ((us) ~0U), sizeof (short) * CHAR_BIT); + TEST_COMPARE (stdc_leading_ones (~0U), sizeof (int) * CHAR_BIT); + TEST_COMPARE (stdc_leading_ones (~0UL), sizeof (long int) * CHAR_BIT); + TEST_COMPARE (stdc_leading_ones (~0ULL), sizeof (long long int) * CHAR_BIT); + TEST_COMPARE (stdc_leading_ones ((uc) ~3), CHAR_BIT - 2); + TEST_COMPARE (stdc_leading_ones ((us) ~9), sizeof (short) * CHAR_BIT - 4); + TEST_COMPARE (stdc_leading_ones (~34U), sizeof (int) * CHAR_BIT - 6); + TEST_COMPARE (stdc_leading_ones (~130UL), sizeof (long int) * CHAR_BIT - 8); + TEST_COMPARE (stdc_leading_ones (~512ULL), + sizeof (long long int) * CHAR_BIT - 10); + TEST_COMPARE (stdc_trailing_zeros ((uc) 0), CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((uc) 0), ui), 1); + TEST_COMPARE (stdc_trailing_zeros ((us) 0), sizeof (short) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((us) 0), ui), 1); + TEST_COMPARE (stdc_trailing_zeros (0U), sizeof (int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0U), ui), 1); + TEST_COMPARE (stdc_trailing_zeros (0UL), sizeof (long int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0UL), ui), 1); + TEST_COMPARE (stdc_trailing_zeros (0ULL), sizeof (long long int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0ULL), ui), 1); + TEST_COMPARE (stdc_trailing_zeros ((uc) ~0U), 0); + TEST_COMPARE (stdc_trailing_zeros ((us) ~0U), 0); + TEST_COMPARE (stdc_trailing_zeros (~0U), 0); + TEST_COMPARE (stdc_trailing_zeros (~0UL), 0); + TEST_COMPARE (stdc_trailing_zeros (~0ULL), 0); + TEST_COMPARE (stdc_trailing_zeros ((uc) 2), 1); + TEST_COMPARE (stdc_trailing_zeros ((us) 24), 3); + TEST_COMPARE (stdc_trailing_zeros (32U), 5); + TEST_COMPARE (stdc_trailing_zeros (128UL), 7); + TEST_COMPARE (stdc_trailing_zeros (512ULL), 9); + TEST_COMPARE (stdc_trailing_ones ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones ((uc) 0), ui), 1); + TEST_COMPARE (stdc_trailing_ones ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones ((us) 0), ui), 1); + TEST_COMPARE (stdc_trailing_ones (0U), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones (0U), ui), 1); + TEST_COMPARE (stdc_trailing_ones (0UL), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones (0UL), ui), 1); + TEST_COMPARE (stdc_trailing_ones (0ULL), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones (0ULL), ui), 1); + TEST_COMPARE (stdc_trailing_ones ((uc) ~0U), CHAR_BIT); + TEST_COMPARE (stdc_trailing_ones ((us) ~0U), sizeof (short) * CHAR_BIT); + TEST_COMPARE (stdc_trailing_ones (~0U), sizeof (int) * CHAR_BIT); + TEST_COMPARE (stdc_trailing_ones (~0UL), sizeof (long int) * CHAR_BIT); + TEST_COMPARE (stdc_trailing_ones (~0ULL), sizeof (long long int) * CHAR_BIT); + TEST_COMPARE (stdc_trailing_ones ((uc) 5), 1); + TEST_COMPARE (stdc_trailing_ones ((us) 15), 4); + TEST_COMPARE (stdc_trailing_ones (127U), 7); + TEST_COMPARE (stdc_trailing_ones (511UL), 9); + TEST_COMPARE (stdc_trailing_ones (~0ULL >> 2), + sizeof (long long int) * CHAR_BIT - 2); + TEST_COMPARE (stdc_first_leading_zero ((uc) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((uc) 0), ui), 1); + TEST_COMPARE (stdc_first_leading_zero ((us) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((us) 0), ui), 1); + TEST_COMPARE (stdc_first_leading_zero (0U), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0U), ui), 1); + TEST_COMPARE (stdc_first_leading_zero (0UL), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0UL), ui), 1); + TEST_COMPARE (stdc_first_leading_zero (0ULL), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0ULL), ui), 1); + TEST_COMPARE (stdc_first_leading_zero ((uc) ~0U), 0); + TEST_COMPARE (stdc_first_leading_zero ((us) ~0U), 0); + TEST_COMPARE (stdc_first_leading_zero (~0U), 0); + TEST_COMPARE (stdc_first_leading_zero (~0UL), 0); + TEST_COMPARE (stdc_first_leading_zero (~0ULL), 0); + TEST_COMPARE (stdc_first_leading_zero ((uc) ~3U), CHAR_BIT - 1); + TEST_COMPARE (stdc_first_leading_zero ((us) ~15U), + sizeof (short) * CHAR_BIT - 3); + TEST_COMPARE (stdc_first_leading_zero (~63U), sizeof (int) * CHAR_BIT - 5); + TEST_COMPARE (stdc_first_leading_zero (~255UL), + sizeof (long int) * CHAR_BIT - 7); + TEST_COMPARE (stdc_first_leading_zero (~1023ULL), + sizeof (long long int) * CHAR_BIT - 9); + TEST_COMPARE (stdc_first_leading_one ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one ((uc) 0), ui), 1); + TEST_COMPARE (stdc_first_leading_one ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one ((us) 0), ui), 1); + TEST_COMPARE (stdc_first_leading_one (0U), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one (0U), ui), 1); + TEST_COMPARE (stdc_first_leading_one (0UL), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one (0UL), ui), 1); + TEST_COMPARE (stdc_first_leading_one (0ULL), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one (0ULL), ui), 1); + TEST_COMPARE (stdc_first_leading_one ((uc) ~0U), 1); + TEST_COMPARE (stdc_first_leading_one ((us) ~0U), 1); + TEST_COMPARE (stdc_first_leading_one (~0U), 1); + TEST_COMPARE (stdc_first_leading_one (~0UL), 1); + TEST_COMPARE (stdc_first_leading_one (~0ULL), 1); + TEST_COMPARE (stdc_first_leading_one ((uc) 3), CHAR_BIT - 1); + TEST_COMPARE (stdc_first_leading_one ((us) 9), + sizeof (short) * CHAR_BIT - 3); + TEST_COMPARE (stdc_first_leading_one (34U), sizeof (int) * CHAR_BIT - 5); + TEST_COMPARE (stdc_first_leading_one (130UL), + sizeof (long int) * CHAR_BIT - 7); + TEST_COMPARE (stdc_first_leading_one (512ULL), + sizeof (long long int) * CHAR_BIT - 9); + TEST_COMPARE (stdc_first_trailing_zero ((uc) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((uc) 0), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero ((us) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((us) 0), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero (0U), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0U), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero (0UL), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0UL), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero (0ULL), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0ULL), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero ((uc) ~0U), 0); + TEST_COMPARE (stdc_first_trailing_zero ((us) ~0U), 0); + TEST_COMPARE (stdc_first_trailing_zero (~0U), 0); + TEST_COMPARE (stdc_first_trailing_zero (~0UL), 0); + TEST_COMPARE (stdc_first_trailing_zero (~0ULL), 0); + TEST_COMPARE (stdc_first_trailing_zero ((uc) 2), 1); + TEST_COMPARE (stdc_first_trailing_zero ((us) 15), 5); + TEST_COMPARE (stdc_first_trailing_zero (63U), 7); + TEST_COMPARE (stdc_first_trailing_zero (128UL), 1); + TEST_COMPARE (stdc_first_trailing_zero (511ULL), 10); + TEST_COMPARE (stdc_first_trailing_one ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((uc) 0), ui), 1); + TEST_COMPARE (stdc_first_trailing_one ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((us) 0), ui), 1); + TEST_COMPARE (stdc_first_trailing_one (0U), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0U), ui), 1); + TEST_COMPARE (stdc_first_trailing_one (0UL), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0UL), ui), 1); + TEST_COMPARE (stdc_first_trailing_one (0ULL), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0ULL), ui), 1); + TEST_COMPARE (stdc_first_trailing_one ((uc) ~0U), 1); + TEST_COMPARE (stdc_first_trailing_one ((us) ~0U), 1); + TEST_COMPARE (stdc_first_trailing_one (~0U), 1); + TEST_COMPARE (stdc_first_trailing_one (~0UL), 1); + TEST_COMPARE (stdc_first_trailing_one (~0ULL), 1); + TEST_COMPARE (stdc_first_trailing_one ((uc) 4), 3); + TEST_COMPARE (stdc_first_trailing_one ((us) 96), 6); + TEST_COMPARE (stdc_first_trailing_one (127U), 1); + TEST_COMPARE (stdc_first_trailing_one (511UL), 1); + TEST_COMPARE (stdc_first_trailing_one (~0ULL << 12), 13); + TEST_COMPARE (stdc_count_zeros ((uc) 0), CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_count_zeros ((uc) 0), ui), 1); + TEST_COMPARE (stdc_count_zeros ((us) 0), sizeof (short) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_count_zeros ((us) 0), ui), 1); + TEST_COMPARE (stdc_count_zeros (0U), sizeof (int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_count_zeros (0U), ui), 1); + TEST_COMPARE (stdc_count_zeros (0UL), sizeof (long int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_count_zeros (0UL), ui), 1); + TEST_COMPARE (stdc_count_zeros (0ULL), sizeof (long long int) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_count_zeros (0ULL), ui), 1); + TEST_COMPARE (stdc_count_zeros ((uc) ~0U), 0); + TEST_COMPARE (stdc_count_zeros ((us) ~0U), 0); + TEST_COMPARE (stdc_count_zeros (~0U), 0); + TEST_COMPARE (stdc_count_zeros (~0UL), 0); + TEST_COMPARE (stdc_count_zeros (~0ULL), 0); + TEST_COMPARE (stdc_count_zeros ((uc) 1U), CHAR_BIT - 1); + TEST_COMPARE (stdc_count_zeros ((us) 42), sizeof (short) * CHAR_BIT - 3); + TEST_COMPARE (stdc_count_zeros (291U), sizeof (int) * CHAR_BIT - 4); + TEST_COMPARE (stdc_count_zeros (~1315UL), 5); + TEST_COMPARE (stdc_count_zeros (3363ULL), + sizeof (long long int) * CHAR_BIT - 6); + TEST_COMPARE (stdc_count_ones ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones ((uc) 0), ui), 1); + TEST_COMPARE (stdc_count_ones ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones ((us) 0), ui), 1); + TEST_COMPARE (stdc_count_ones (0U), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones (0U), ui), 1); + TEST_COMPARE (stdc_count_ones (0UL), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones (0UL), ui), 1); + TEST_COMPARE (stdc_count_ones (0ULL), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones (0ULL), ui), 1); + TEST_COMPARE (stdc_count_ones ((uc) ~0U), CHAR_BIT); + TEST_COMPARE (stdc_count_ones ((us) ~0U), sizeof (short) * CHAR_BIT); + TEST_COMPARE (stdc_count_ones (~0U), sizeof (int) * CHAR_BIT); + TEST_COMPARE (stdc_count_ones (~0UL), sizeof (long int) * CHAR_BIT); + TEST_COMPARE (stdc_count_ones (~0ULL), sizeof (long long int) * CHAR_BIT); + TEST_COMPARE (stdc_count_ones ((uc) ~1U), CHAR_BIT - 1); + TEST_COMPARE (stdc_count_ones ((us) ~42), sizeof (short) * CHAR_BIT - 3); + TEST_COMPARE (stdc_count_ones (~291U), sizeof (int) * CHAR_BIT - 4); + TEST_COMPARE (stdc_count_ones (1315UL), 5); + TEST_COMPARE (stdc_count_ones (~3363ULL), + sizeof (long long int) * CHAR_BIT - 6); + TEST_COMPARE (stdc_has_single_bit ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit ((uc) 0), _Bool), 1); + TEST_COMPARE (stdc_has_single_bit ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit ((us) 0), _Bool), 1); + TEST_COMPARE (stdc_has_single_bit (0U), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit (0U), _Bool), 1); + TEST_COMPARE (stdc_has_single_bit (0UL), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit (0UL), _Bool), 1); + TEST_COMPARE (stdc_has_single_bit (0ULL), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit (0ULL), _Bool), 1); + TEST_COMPARE (stdc_has_single_bit ((uc) 2), 1); + TEST_COMPARE (stdc_has_single_bit ((us) 8), 1); + TEST_COMPARE (stdc_has_single_bit (32U), 1); + TEST_COMPARE (stdc_has_single_bit (128UL), 1); + TEST_COMPARE (stdc_has_single_bit (512ULL), 1); + TEST_COMPARE (stdc_has_single_bit ((uc) 7), 0); + TEST_COMPARE (stdc_has_single_bit ((us) 96), 0); + TEST_COMPARE (stdc_has_single_bit (513U), 0); + TEST_COMPARE (stdc_has_single_bit (1022UL), 0); + TEST_COMPARE (stdc_has_single_bit (12ULL), 0); + TEST_COMPARE (stdc_bit_width ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width ((uc) 0), ui), 1); + TEST_COMPARE (stdc_bit_width ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width ((us) 0), ui), 1); + TEST_COMPARE (stdc_bit_width (0U), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width (0U), ui), 1); + TEST_COMPARE (stdc_bit_width (0UL), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width (0UL), ui), 1); + TEST_COMPARE (stdc_bit_width (0ULL), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width (0ULL), ui), 1); + TEST_COMPARE (stdc_bit_width ((uc) ~0U), CHAR_BIT); + TEST_COMPARE (stdc_bit_width ((us) ~0U), sizeof (short) * CHAR_BIT); + TEST_COMPARE (stdc_bit_width (~0U), sizeof (int) * CHAR_BIT); + TEST_COMPARE (stdc_bit_width (~0UL), sizeof (long int) * CHAR_BIT); + TEST_COMPARE (stdc_bit_width (~0ULL), sizeof (long long int) * CHAR_BIT); + TEST_COMPARE (stdc_bit_width ((uc) ((uc) ~0U >> 1)), CHAR_BIT - 1); + TEST_COMPARE (stdc_bit_width ((uc) 6), 3); + TEST_COMPARE (stdc_bit_width ((us) 12U), 4); + TEST_COMPARE (stdc_bit_width ((us) ((us) ~0U >> 5)), + sizeof (short) * CHAR_BIT - 5); + TEST_COMPARE (stdc_bit_width (137U), 8); + TEST_COMPARE (stdc_bit_width (269U), 9); + TEST_COMPARE (stdc_bit_width (39UL), 6); + TEST_COMPARE (stdc_bit_width (~0UL >> 2), sizeof (long int) * CHAR_BIT - 2); + TEST_COMPARE (stdc_bit_width (1023ULL), 10); + TEST_COMPARE (stdc_bit_width (1024ULL), 11); + TEST_COMPARE (stdc_bit_floor ((uc) 0), 0); + TEST_COMPARE (expr_has_type (stdc_bit_floor ((uc) 0), uc), 1); + TEST_COMPARE (stdc_bit_floor ((us) 0), 0); + TEST_COMPARE (expr_has_type (stdc_bit_floor ((us) 0), us), 1); + TEST_COMPARE (stdc_bit_floor (0U), 0U); + TEST_COMPARE (expr_has_type (stdc_bit_floor (0U), ui), 1); + TEST_COMPARE (stdc_bit_floor (0UL), 0UL); + TEST_COMPARE (expr_has_type (stdc_bit_floor (0UL), ul), 1); + TEST_COMPARE (stdc_bit_floor (0ULL), 0ULL); + TEST_COMPARE (expr_has_type (stdc_bit_floor (0ULL), ull), 1); + TEST_COMPARE (stdc_bit_floor ((uc) ~0U), (1U << (CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_floor ((us) ~0U), + (1U << (sizeof (short) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_floor (~0U), (1U << (sizeof (int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_floor (~0UL), + (1UL << (sizeof (long int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_floor (~0ULL), + (1ULL << (sizeof (long long int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_floor ((uc) 4), 4); + TEST_COMPARE (stdc_bit_floor ((uc) 7), 4); + TEST_COMPARE (stdc_bit_floor ((us) 8U), 8); + TEST_COMPARE (stdc_bit_floor ((us) 31U), 16); + TEST_COMPARE (stdc_bit_floor (137U), 128U); + TEST_COMPARE (stdc_bit_floor (269U), 256U); + TEST_COMPARE (stdc_bit_floor (511UL), 256UL); + TEST_COMPARE (stdc_bit_floor (512UL), 512UL); + TEST_COMPARE (stdc_bit_floor (513UL), 512ULL); + TEST_COMPARE (stdc_bit_floor (1024ULL), 1024ULL); + TEST_COMPARE (stdc_bit_ceil ((uc) 0), 1); + TEST_COMPARE (expr_has_type (stdc_bit_ceil ((uc) 0), uc), 1); + TEST_COMPARE (stdc_bit_ceil ((us) 0), 1); + TEST_COMPARE (expr_has_type (stdc_bit_ceil ((us) 0), us), 1); + TEST_COMPARE (stdc_bit_ceil (0U), 1U); + TEST_COMPARE (expr_has_type (stdc_bit_ceil (0U), ui), 1); + TEST_COMPARE (stdc_bit_ceil (0UL), 1UL); + TEST_COMPARE (expr_has_type (stdc_bit_ceil (0UL), ul), 1); + TEST_COMPARE (stdc_bit_ceil (0ULL), 1ULL); + TEST_COMPARE (expr_has_type (stdc_bit_ceil (0ULL), ull), 1); + TEST_COMPARE (stdc_bit_ceil ((uc) ~0U), 0); + TEST_COMPARE (stdc_bit_ceil ((us) ~0U), 0); + TEST_COMPARE (stdc_bit_ceil (~0U), 0U); + TEST_COMPARE (stdc_bit_ceil (~0UL), 0UL); + TEST_COMPARE (stdc_bit_ceil (~0ULL), 0ULL); + TEST_COMPARE (stdc_bit_ceil ((uc) ((uc) ~0U >> 1)), (1U << (CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil ((uc) ((uc) ~0U >> 1)), (1U << (CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil ((us) ((us) ~0U >> 1)), + (1U << (sizeof (short) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil ((us) ((us) ~0U >> 1)), + (1U << (sizeof (short) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil (~0U >> 1), + (1U << (sizeof (int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil (1U << (sizeof (int) * CHAR_BIT - 1)), + (1U << (sizeof (int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil (~0UL >> 1), + (1UL << (sizeof (long int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil (~0UL >> 1), + (1UL << (sizeof (long int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil (1ULL + << (sizeof (long long int) * CHAR_BIT - 1)), + (1ULL << (sizeof (long long int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil (~0ULL >> 1), + (1ULL << (sizeof (long long int) * CHAR_BIT - 1))); + TEST_COMPARE (stdc_bit_ceil ((uc) 1), 1); + TEST_COMPARE (stdc_bit_ceil ((uc) 2), 2); + TEST_COMPARE (stdc_bit_ceil ((us) 3U), 4); + TEST_COMPARE (stdc_bit_ceil ((us) 4U), 4); + TEST_COMPARE (stdc_bit_ceil (5U), 8U); + TEST_COMPARE (stdc_bit_ceil (269U), 512U); + TEST_COMPARE (stdc_bit_ceil (511UL), 512UL); + TEST_COMPARE (stdc_bit_ceil (512UL), 512UL); + TEST_COMPARE (stdc_bit_ceil (513ULL), 1024ULL); + TEST_COMPARE (stdc_bit_ceil (1025ULL), 2048ULL); +# ifdef __SIZEOF_INT128__ + TEST_COMPARE (stdc_leading_zeros ((unsigned __int128) 0), + sizeof (__int128) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned __int128) 0), ui), + 1); + TEST_COMPARE (stdc_leading_zeros (~(unsigned __int128) 0), 0); + TEST_COMPARE (stdc_leading_ones ((unsigned __int128) 0), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned __int128) 0), ui), + 1); + TEST_COMPARE (stdc_leading_ones (~(unsigned __int128) 0), + sizeof (__int128) * CHAR_BIT); + TEST_COMPARE (stdc_trailing_zeros ((unsigned __int128) 0), + sizeof (__int128) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned __int128) 0), + ui), 1); + TEST_COMPARE (stdc_trailing_zeros (~(unsigned __int128) 0), 0); + TEST_COMPARE (stdc_trailing_ones ((unsigned __int128) 0), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned __int128) 0), ui), + 1); + TEST_COMPARE (stdc_trailing_ones (~(unsigned __int128) 0), + sizeof (__int128) * CHAR_BIT); + TEST_COMPARE (stdc_first_leading_zero ((unsigned __int128) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned __int128) 0), + ui), 1); + TEST_COMPARE (stdc_first_leading_zero (~(unsigned __int128) 0), 0); + TEST_COMPARE (stdc_first_leading_one ((unsigned __int128) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned __int128) 0), + ui), 1); + TEST_COMPARE (stdc_first_leading_one (~(unsigned __int128) 0), 1); + TEST_COMPARE (stdc_first_trailing_zero ((unsigned __int128) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned __int128) + 0), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero (~(unsigned __int128) 0), 0); + TEST_COMPARE (stdc_first_trailing_one ((unsigned __int128) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned __int128) 0), + ui), 1); + TEST_COMPARE (stdc_first_trailing_one (~(unsigned __int128) 0), 1); + TEST_COMPARE (stdc_count_zeros ((unsigned __int128) 0), + sizeof (__int128) * CHAR_BIT); + TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned __int128) 0), ui), + 1); + TEST_COMPARE (stdc_count_zeros (~(unsigned __int128) 0), 0); + TEST_COMPARE (stdc_count_ones ((unsigned __int128) 0), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned __int128) 0), ui), + 1); + TEST_COMPARE (stdc_count_ones (~(unsigned __int128) 0), + sizeof (__int128) * CHAR_BIT); + TEST_COMPARE (stdc_has_single_bit ((unsigned __int128) 0), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned __int128) 0), + _Bool), 1); + TEST_COMPARE (stdc_has_single_bit (~(unsigned __int128) 0), 0); + TEST_COMPARE (stdc_bit_width ((unsigned __int128) 0), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned __int128) 0), ui), 1); + TEST_COMPARE (stdc_bit_width (~(unsigned __int128) 0), + sizeof (__int128) * CHAR_BIT); + TEST_COMPARE (stdc_bit_floor ((unsigned __int128) 0) != 0, 0); + TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned __int128) 0), + unsigned __int128), 1); + TEST_COMPARE (stdc_bit_floor (~(unsigned __int128) 0) + != ((unsigned __int128) 1) << (sizeof (__int128) + * CHAR_BIT - 1), 0); + TEST_COMPARE (stdc_bit_ceil ((unsigned __int128) 0) != 1, 0); + TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned __int128) 0), + unsigned __int128), 1); + TEST_COMPARE (stdc_bit_ceil ((unsigned __int128) 1) != 1, 0); + TEST_COMPARE (stdc_bit_ceil ((~(unsigned __int128) 0) >> 1) + != ((unsigned __int128) 1) << (sizeof (__int128) + * CHAR_BIT - 1), 0); + TEST_COMPARE (stdc_bit_ceil (~(unsigned __int128) 0) != 0, 0); +# endif + uc a = 0; + TEST_COMPARE (stdc_bit_width (a++), 0); + TEST_COMPARE (a, 1); + ull b = 0; + TEST_COMPARE (stdc_bit_width (b++), 0); + TEST_COMPARE (b, 1); + TEST_COMPARE (stdc_bit_floor (a++), 1); + TEST_COMPARE (a, 2); + TEST_COMPARE (stdc_bit_floor (b++), 1); + TEST_COMPARE (b, 2); + TEST_COMPARE (stdc_bit_ceil (a++), 2); + TEST_COMPARE (a, 3); + TEST_COMPARE (stdc_bit_ceil (b++), 2); + TEST_COMPARE (b, 3); + TEST_COMPARE (stdc_leading_zeros (a++), CHAR_BIT - 2); + TEST_COMPARE (a, 4); + TEST_COMPARE (stdc_leading_zeros (b++), + sizeof (long long int) * CHAR_BIT - 2); + TEST_COMPARE (b, 4); + TEST_COMPARE (stdc_leading_ones (a++), 0); + TEST_COMPARE (a, 5); + TEST_COMPARE (stdc_leading_ones (b++), 0); + TEST_COMPARE (b, 5); + TEST_COMPARE (stdc_trailing_zeros (a++), 0); + TEST_COMPARE (a, 6); + TEST_COMPARE (stdc_trailing_zeros (b++), 0); + TEST_COMPARE (b, 6); + TEST_COMPARE (stdc_trailing_ones (a++), 0); + TEST_COMPARE (a, 7); + TEST_COMPARE (stdc_trailing_ones (b++), 0); + TEST_COMPARE (b, 7); + TEST_COMPARE (stdc_first_leading_zero (a++), 1); + TEST_COMPARE (a, 8); + TEST_COMPARE (stdc_first_leading_zero (b++), 1); + TEST_COMPARE (b, 8); + TEST_COMPARE (stdc_first_leading_one (a++), CHAR_BIT - 3); + TEST_COMPARE (a, 9); + TEST_COMPARE (stdc_first_leading_one (b++), + sizeof (long long int) * CHAR_BIT - 3); + TEST_COMPARE (b, 9); + TEST_COMPARE (stdc_first_trailing_zero (a++), 2); + TEST_COMPARE (a, 10); + TEST_COMPARE (stdc_first_trailing_zero (b++), 2); + TEST_COMPARE (b, 10); + TEST_COMPARE (stdc_first_trailing_one (a++), 2); + TEST_COMPARE (a, 11); + TEST_COMPARE (stdc_first_trailing_one (b++), 2); + TEST_COMPARE (b, 11); + TEST_COMPARE (stdc_count_zeros (a++), CHAR_BIT - 3); + TEST_COMPARE (a, 12); + TEST_COMPARE (stdc_count_zeros (b++), + sizeof (long long int) * CHAR_BIT - 3); + TEST_COMPARE (b, 12); + TEST_COMPARE (stdc_count_ones (a++), 2); + TEST_COMPARE (a, 13); + TEST_COMPARE (stdc_count_ones (b++), 2); + TEST_COMPARE (b, 13); + TEST_COMPARE (stdc_has_single_bit (a++), 0); + TEST_COMPARE (a, 14); + TEST_COMPARE (stdc_has_single_bit (b++), 0); + TEST_COMPARE (b, 14); +# ifdef BITINT_MAXWIDTH +# if BITINT_MAXWIDTH >= 64 + TEST_COMPARE (stdc_leading_zeros (0uwb), 1); + TEST_COMPARE (expr_has_type (stdc_leading_zeros (0uwb), ui), 1); + TEST_COMPARE (stdc_leading_zeros (1uwb), 0); + TEST_COMPARE (expr_has_type (stdc_leading_zeros (1uwb), ui), 1); + TEST_COMPARE (stdc_leading_ones (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones (0uwb), ui), 1); + TEST_COMPARE (stdc_leading_ones (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_leading_ones (1uwb), ui), 1); + TEST_COMPARE (stdc_trailing_zeros (0uwb), 1); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0uwb), ui), 1); + TEST_COMPARE (stdc_trailing_zeros (1uwb), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros (1uwb), ui), 1); + TEST_COMPARE (stdc_trailing_ones (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones (0uwb), ui), 1); + TEST_COMPARE (stdc_trailing_ones (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_trailing_ones (1uwb), ui), 1); + TEST_COMPARE (stdc_first_leading_zero (0uwb), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0uwb), ui), 1); + TEST_COMPARE (stdc_first_leading_zero (1uwb), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero (1uwb), ui), 1); + TEST_COMPARE (stdc_first_leading_one (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one (0uwb), ui), 1); + TEST_COMPARE (stdc_first_leading_one (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_one (1uwb), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero (0uwb), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0uwb), ui), 1); + TEST_COMPARE (stdc_first_trailing_zero (1uwb), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (1uwb), ui), 1); + TEST_COMPARE (stdc_first_trailing_one (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0uwb), ui), 1); + TEST_COMPARE (stdc_first_trailing_one (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one (1uwb), ui), 1); + TEST_COMPARE (stdc_count_zeros (0uwb), 1); + TEST_COMPARE (expr_has_type (stdc_count_zeros (0uwb), ui), 1); + TEST_COMPARE (stdc_count_zeros (1uwb), 0); + TEST_COMPARE (expr_has_type (stdc_count_zeros (1uwb), ui), 1); + TEST_COMPARE (stdc_count_ones (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones (0uwb), ui), 1); + TEST_COMPARE (stdc_count_ones (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_count_ones (1uwb), ui), 1); + TEST_COMPARE (stdc_has_single_bit (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit (0uwb), _Bool), 1); + TEST_COMPARE (stdc_has_single_bit (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_has_single_bit (1uwb), _Bool), 1); + TEST_COMPARE (stdc_bit_width (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width (0uwb), ui), 1); + TEST_COMPARE (stdc_bit_width (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_bit_width (1uwb), ui), 1); + TEST_COMPARE (stdc_bit_floor (0uwb), 0); + TEST_COMPARE (expr_has_type (stdc_bit_floor (0uwb), unsigned _BitInt(1)), 1); + TEST_COMPARE (stdc_bit_floor (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_bit_floor (1uwb), unsigned _BitInt(1)), 1); + TEST_COMPARE (stdc_bit_ceil (0uwb), 1); + TEST_COMPARE (expr_has_type (stdc_bit_ceil (0uwb), unsigned _BitInt(1)), 1); + TEST_COMPARE (stdc_bit_ceil (1uwb), 1); + TEST_COMPARE (expr_has_type (stdc_bit_ceil (1uwb), unsigned _BitInt(1)), 1); + unsigned _BitInt(1) c = 0; + TEST_COMPARE (stdc_bit_floor (c++), 0); + TEST_COMPARE (c, 1); + TEST_COMPARE (stdc_bit_floor (c++), 1); + TEST_COMPARE (c, 0); + TEST_COMPARE (stdc_bit_ceil (c++), 1); + TEST_COMPARE (c, 1); + TEST_COMPARE (stdc_bit_ceil (c++), 1); + TEST_COMPARE (c, 0); +# endif +# if BITINT_MAXWIDTH >= 512 + TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(512)) 0), 512); + TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(373)) 0), 373); + TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_leading_zeros (~(unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (stdc_leading_zeros (~(unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(512)) 275), 512 - 9); + TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(373)) 512), 373 - 10); + TEST_COMPARE (stdc_leading_ones ((unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_leading_ones ((unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(512)) 0), 512); + TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(373)) 0), 373); + TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(512)) 275), 512 - 9); + TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(373)) 512), 373 - 10); + TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(512)) 0), 512); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(373)) 0), 373); + TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_trailing_zeros (~(unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (stdc_trailing_zeros (~(unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(512)) 256), 8); + TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(373)) 512), 9); + TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_trailing_ones (~(unsigned _BitInt(512)) 0), 512); + TEST_COMPARE (stdc_trailing_ones (~(unsigned _BitInt(373)) 0), 373); + TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(512)) 255), 8); + TEST_COMPARE (stdc_trailing_ones ((~(unsigned _BitInt(373)) 0) >> 2), + 373 - 2); + TEST_COMPARE (stdc_first_leading_zero ((unsigned _BitInt(512)) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned _BitInt(512)) + 0), ui), 1); + TEST_COMPARE (stdc_first_leading_zero ((unsigned _BitInt(373)) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned _BitInt(373)) + 0), ui), 1); + TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(512)) 511), + 512 - 8); + TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(373)) 1023), + 373 - 9); + TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned _BitInt(512)) + 0), ui), 1); + TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned _BitInt(373)) + 0), ui), 1); + TEST_COMPARE (stdc_first_leading_one (~(unsigned _BitInt(512)) 0), 1); + TEST_COMPARE (stdc_first_leading_one (~(unsigned _BitInt(373)) 0), 1); + TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(512)) 275), 512 - 8); + TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(373)) 512), 373 - 9); + TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(512)) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned + _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(373)) 0), 1); + TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned + _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_first_trailing_zero (~(unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (stdc_first_trailing_zero (~(unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(512)) 255), 9); + TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(373)) 511), 10); + TEST_COMPARE (stdc_first_trailing_one ((unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned _BitInt(512)) + 0), ui), 1); + TEST_COMPARE (stdc_first_trailing_one ((unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned _BitInt(373)) + 0), ui), 1); + TEST_COMPARE (stdc_first_trailing_one (~(unsigned _BitInt(512)) 0), 1); + TEST_COMPARE (stdc_first_trailing_one (~(unsigned _BitInt(373)) 0), 1); + TEST_COMPARE (stdc_first_trailing_one (((unsigned _BitInt(512)) 255) << 175), + 176); + TEST_COMPARE (stdc_first_trailing_one ((~(unsigned _BitInt(373)) 0) << 311), + 312); + TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(512)) 0), 512); + TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(373)) 0), 373); + TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_count_zeros (~(unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (stdc_count_zeros (~(unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(512)) 1315), 512 - 5); + TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(373)) 3363), 373 - 6); + TEST_COMPARE (stdc_count_ones ((unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_count_ones ((unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(512)) 0), 512); + TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(373)) 0), 373); + TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(512)) 1315), 512 - 5); + TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(373)) 3363), 373 - 6); + TEST_COMPARE (stdc_has_single_bit ((unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned _BitInt(512)) 0), + _Bool), 1); + TEST_COMPARE (stdc_has_single_bit ((unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned _BitInt(373)) 0), + _Bool), 1); + TEST_COMPARE (stdc_has_single_bit (~(unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (stdc_has_single_bit (~(unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (stdc_has_single_bit (((unsigned _BitInt(512)) 1022) << 279), + 0); + TEST_COMPARE (stdc_has_single_bit (((unsigned _BitInt(373)) 12) << 305), 0); + TEST_COMPARE (stdc_bit_width ((unsigned _BitInt(512)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned _BitInt(512)) 0), + ui), 1); + TEST_COMPARE (stdc_bit_width ((unsigned _BitInt(373)) 0), 0); + TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned _BitInt(373)) 0), + ui), 1); + TEST_COMPARE (stdc_bit_width (~(unsigned _BitInt(512)) 0), 512); + TEST_COMPARE (stdc_bit_width (~(unsigned _BitInt(373)) 0), 373); + TEST_COMPARE (stdc_bit_width (((unsigned _BitInt(512)) 1023) << 405), + 405 + 10); + TEST_COMPARE (stdc_bit_width (((unsigned _BitInt(373)) 1024) << 242), + 242 + 11); + TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(512)) 0) != 0, 0); + TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned _BitInt(512)) 0), + unsigned _BitInt(512)), 1); + TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(373)) 0) != 0, 0); + TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned _BitInt(373)) 0), + unsigned _BitInt(373)), 1); + TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(512)) 0) + != ((unsigned _BitInt(512)) 1) << (512 - 1), 0); + TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(373)) 0) + != ((unsigned _BitInt(373)) 1) << (373 - 1), 0); + TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(512)) 511) << 405) + != (((unsigned _BitInt(512)) 256) << 405), 0); + TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(373)) 512) << 242) + != (((unsigned _BitInt(512)) 512) << 242), 0); + TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(512)) 0) != 1, 0); + TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned _BitInt(512)) 0), + unsigned _BitInt(512)), 1); + TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(373)) 0) != 1, 0); + TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned _BitInt(373)) 0), + unsigned _BitInt(373)), 1); + TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(512)) 0) != 0, 0); + TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(373)) 0) != 0, 0); + TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(512)) 1) << (512 - 1)) + != ((unsigned _BitInt(512)) 1) << (512 - 1), 0); + TEST_COMPARE (stdc_bit_ceil ((~(unsigned _BitInt(373)) 0) >> 1) + != ((unsigned _BitInt(373)) 1) << (373 - 1), 0); + TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(512)) 512) << 405) + != (((unsigned _BitInt(512)) 512) << 405), 0); + TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(373)) 513) << 242) + != (((unsigned _BitInt(512)) 1024) << 242), 0); + TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 0, + 0); + TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(BITINT_MAXWIDTH)) 0) + != ((unsigned _BitInt(BITINT_MAXWIDTH)) 1) << (BITINT_MAXWIDTH + - 1), 0); + TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(BITINT_MAXWIDTH)) 511) + << 405) + != (((unsigned _BitInt(BITINT_MAXWIDTH)) 256) << 405), 0); + TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) + << 405) + != (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) << 405), 0); + TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 1, 0); + TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 0, + 0); + TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 1) + << (BITINT_MAXWIDTH - 1)) + != ((unsigned _BitInt(BITINT_MAXWIDTH)) 1) << (BITINT_MAXWIDTH + - 1), 0); + TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) + << 405) + != (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) << 405), 0); + TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 513) + << 405) + != (((unsigned _BitInt(BITINT_MAXWIDTH)) 1024) << 405), 0); +# endif +# endif + return 0; +} +#else +static int +do_test (void) +{ + return 0; +} +#endif + +#include commit 71fcdba577884627c3ee4e43beb915da752efb1f Author: Florian Weimer Date: Fri Mar 15 19:08:24 2024 +0100 linux: Use rseq area unconditionally in sched_getcpu (bug 31479) Originally, nptl/descr.h included , but we removed that in commit 2c6b4b272e6b4d07303af25709051c3e96288f2d ("nptl: Unconditionally use a 32-byte rseq area"). After that, it was not ensured that the RSEQ_SIG macro was defined during sched_getcpu.c compilation that provided a definition. This commit always checks the rseq area for CPU number information before using the other approaches. This adds an unnecessary (but well-predictable) branch on architectures which do not define RSEQ_SIG, but its cost is small compared to the system call. Most architectures that have vDSO acceleration for getcpu also have rseq support. Fixes: 2c6b4b272e6b4d07303af25709051c3e96288f2d Fixes: 1d350aa06091211863e41169729cee1bca39f72f Reviewed-by: Arjun Shankar (cherry picked from commit 7a76f218677d149d8b7875b336722108239f7ee9) diff --git a/sysdeps/unix/sysv/linux/sched_getcpu.c b/sysdeps/unix/sysv/linux/sched_getcpu.c index dfb884568d..72a3360550 100644 --- a/sysdeps/unix/sysv/linux/sched_getcpu.c +++ b/sysdeps/unix/sysv/linux/sched_getcpu.c @@ -33,17 +33,9 @@ vsyscall_sched_getcpu (void) return r == -1 ? r : cpu; } -#ifdef RSEQ_SIG int sched_getcpu (void) { int cpu_id = THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id); return __glibc_likely (cpu_id >= 0) ? cpu_id : vsyscall_sched_getcpu (); } -#else /* RSEQ_SIG */ -int -sched_getcpu (void) -{ - return vsyscall_sched_getcpu (); -} -#endif /* RSEQ_SIG */ commit ee7f4c54e19738c2c27d3846e1e9b3595c89221f Author: Manjunath Matti Date: Tue Mar 19 15:29:48 2024 -0500 powerpc: Add HWCAP3/HWCAP4 data to TCB for Power Architecture. This patch adds a new feature for powerpc. In order to get faster access to the HWCAP3/HWCAP4 masks, similar to HWCAP/HWCAP2 (i.e. for implementing __builtin_cpu_supports() in GCC) without the overhead of reading them from the auxiliary vector, we now reserve space for them in the TCB. Suggested-by: Peter Bergner Reviewed-by: Peter Bergner (cherry picked from commit 3ab9b88e2ac91062b6d493fe32bd101a55006c6a) diff --git a/elf/dl-diagnostics.c b/elf/dl-diagnostics.c index 7345ebc4e5..aaf67b87e8 100644 --- a/elf/dl-diagnostics.c +++ b/elf/dl-diagnostics.c @@ -235,6 +235,8 @@ _dl_print_diagnostics (char **environ) _dl_diagnostics_print_labeled_value ("dl_hwcap", GLRO (dl_hwcap)); _dl_diagnostics_print_labeled_value ("dl_hwcap_important", HWCAP_IMPORTANT); _dl_diagnostics_print_labeled_value ("dl_hwcap2", GLRO (dl_hwcap2)); + _dl_diagnostics_print_labeled_value ("dl_hwcap3", GLRO (dl_hwcap3)); + _dl_diagnostics_print_labeled_value ("dl_hwcap4", GLRO (dl_hwcap4)); _dl_diagnostics_print_labeled_string ("dl_hwcaps_subdirs", _dl_hwcaps_subdirs); _dl_diagnostics_print_labeled_value diff --git a/elf/dl-support.c b/elf/dl-support.c index 2f502c8b0d..451932dd03 100644 --- a/elf/dl-support.c +++ b/elf/dl-support.c @@ -158,6 +158,8 @@ const ElfW(Phdr) *_dl_phdr; size_t _dl_phnum; uint64_t _dl_hwcap; uint64_t _dl_hwcap2; +uint64_t _dl_hwcap3; +uint64_t _dl_hwcap4; enum dso_sort_algorithm _dl_dso_sort_algo; diff --git a/elf/elf.h b/elf/elf.h index 455731663c..1c394c64cd 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -1234,6 +1234,10 @@ typedef struct #define AT_RSEQ_FEATURE_SIZE 27 /* rseq supported feature size. */ #define AT_RSEQ_ALIGN 28 /* rseq allocation alignment. */ +/* More machine-dependent hints about processor capabilities. */ +#define AT_HWCAP3 29 /* extension of AT_HWCAP. */ +#define AT_HWCAP4 30 /* extension of AT_HWCAP. */ + #define AT_EXECFN 31 /* Filename of executable. */ /* Pointer to the global system page used for system calls and other diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index 117c901ccc..50f58a60e3 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -646,6 +646,8 @@ struct rtld_global_ro /* Mask for more hardware capabilities that are available on some platforms. */ EXTERN uint64_t _dl_hwcap2; + EXTERN uint64_t _dl_hwcap3; + EXTERN uint64_t _dl_hwcap4; EXTERN enum dso_sort_algorithm _dl_dso_sort_algo; diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c index a76bb6e5b0..8cf00aa7e3 100644 --- a/sysdeps/powerpc/dl-procinfo.c +++ b/sysdeps/powerpc/dl-procinfo.c @@ -38,6 +38,10 @@ needed. */ +/* The total number of available bits (including those prior to + _DL_HWCAP_FIRST). Some of these bits might not be used. */ +#define _DL_HWCAP_COUNT 128 + #ifndef PROCINFO_CLASS # define PROCINFO_CLASS #endif @@ -61,7 +65,7 @@ PROCINFO_CLASS struct cpu_features _dl_powerpc_cpu_features #if !defined PROCINFO_DECL && defined SHARED ._dl_powerpc_cap_flags #else -PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15] +PROCINFO_CLASS const char _dl_powerpc_cap_flags[_DL_HWCAP_COUNT][15] #endif #ifndef PROCINFO_DECL = { diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h index 68f4241095..f8cb343877 100644 --- a/sysdeps/powerpc/dl-procinfo.h +++ b/sysdeps/powerpc/dl-procinfo.h @@ -22,16 +22,17 @@ #include #include /* This defines the PPC_FEATURE[2]_* macros. */ -/* The total number of available bits (including those prior to - _DL_HWCAP_FIRST). Some of these bits might not be used. */ -#define _DL_HWCAP_COUNT 64 +/* Feature masks are all 32-bits in size. */ +#define _DL_HWCAP_SIZE 32 -/* Features started at bit 31 and decremented as new features were added. */ -#define _DL_HWCAP_LAST 31 +/* AT_HWCAP2 feature strings follow the AT_HWCAP feature strings. */ +#define _DL_HWCAP2_OFFSET _DL_HWCAP_SIZE -/* AT_HWCAP2 features started at bit 31 and decremented as new features were - added. HWCAP2 feature bits start at bit 0. */ -#define _DL_HWCAP2_LAST 31 +/* AT_HWCAP3 feature strings follow the AT_HWCAP2 feature strings. */ +#define _DL_HWCAP3_OFFSET (_DL_HWCAP2_OFFSET + _DL_HWCAP_SIZE) + +/* AT_HWCAP4 feature strings follow the AT_HWCAP3 feature strings. */ +#define _DL_HWCAP4_OFFSET (_DL_HWCAP3_OFFSET + _DL_HWCAP_SIZE) /* These bits influence library search. */ #define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \ @@ -187,21 +188,42 @@ _dl_procinfo (unsigned int type, unsigned long int word) case AT_HWCAP: _dl_printf ("AT_HWCAP: "); - for (int i = 0; i <= _DL_HWCAP_LAST; ++i) + for (int i = 0; i < _DL_HWCAP_SIZE; ++i) if (word & (1 << i)) _dl_printf (" %s", _dl_hwcap_string (i)); break; case AT_HWCAP2: { - unsigned int offset = _DL_HWCAP_LAST + 1; _dl_printf ("AT_HWCAP2: "); - /* We have to go through them all because the kernel added the - AT_HWCAP2 features starting with the high bits. */ - for (int i = 0; i <= _DL_HWCAP2_LAST; ++i) - if (word & (1 << i)) - _dl_printf (" %s", _dl_hwcap_string (offset + i)); + /* We have to go through them all because the kernel added the + AT_HWCAP2 features starting with the high bits. */ + for (int i = 0; i < _DL_HWCAP_SIZE; ++i) + if (word & (1 << i)) + _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP2_OFFSET + i)); + break; + } + case AT_HWCAP3: + { + _dl_printf ("AT_HWCAP3: "); + + /* We have to go through them all because the kernel added the + AT_HWCAP3 features starting with the high bits. */ + for (int i = 0; i < _DL_HWCAP_SIZE; ++i) + if (word & (1 << i)) + _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP3_OFFSET + i)); + break; + } + case AT_HWCAP4: + { + _dl_printf ("AT_HWCAP4: "); + + /* We have to go through them all because the kernel added the + AT_HWCAP4 features starting with the high bits. */ + for (int i = 0; i <= _DL_HWCAP_SIZE; ++i) + if (word & (1 << i)) + _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP4_OFFSET + i)); break; } case AT_L1I_CACHEGEOMETRY: diff --git a/sysdeps/powerpc/hwcapinfo.c b/sysdeps/powerpc/hwcapinfo.c index 76344f285a..f6fede15a7 100644 --- a/sysdeps/powerpc/hwcapinfo.c +++ b/sysdeps/powerpc/hwcapinfo.c @@ -31,7 +31,7 @@ void __tcb_parse_hwcap_and_convert_at_platform (void) { - uint64_t h1, h2; + uint64_t h1, h2, h3, h4; /* Read AT_PLATFORM string from auxv and convert it to a number. */ __tcb.at_platform = _dl_string_platform (GLRO (dl_platform)); @@ -39,6 +39,8 @@ __tcb_parse_hwcap_and_convert_at_platform (void) /* Read HWCAP and HWCAP2 from auxv. */ h1 = GLRO (dl_hwcap); h2 = GLRO (dl_hwcap2); + h3 = GLRO (dl_hwcap3); + h4 = GLRO (dl_hwcap4); /* hwcap contains only the latest supported ISA, the code checks which is and fills the previous supported ones. */ @@ -64,13 +66,16 @@ __tcb_parse_hwcap_and_convert_at_platform (void) else if (h1 & PPC_FEATURE_POWER5) h1 |= PPC_FEATURE_POWER4; - uint64_t array_hwcaps[] = { h1, h2 }; + uint64_t array_hwcaps[] = { h1, h2, h3, h4 }; init_cpu_features (&GLRO(dl_powerpc_cpu_features), array_hwcaps); /* Consolidate both HWCAP and HWCAP2 into a single doubleword so that we can read both in a single load later. */ __tcb.hwcap = (h1 << 32) | (h2 & 0xffffffff); - __tcb.hwcap_extn = 0x0; + + /* Consolidate both HWCAP3 and HWCAP4 into a single doubleword so that + we can read both in a single load later. */ + __tcb.hwcap_extn = (h3 << 32) | (h4 & 0xffffffff); } #if IS_IN (rtld) diff --git a/sysdeps/unix/sysv/linux/dl-parse_auxv.h b/sysdeps/unix/sysv/linux/dl-parse_auxv.h index e3d758b163..ea2a58ecb1 100644 --- a/sysdeps/unix/sysv/linux/dl-parse_auxv.h +++ b/sysdeps/unix/sysv/linux/dl-parse_auxv.h @@ -47,6 +47,8 @@ void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values) GLRO(dl_platform) = (void *) auxv_values[AT_PLATFORM]; GLRO(dl_hwcap) = auxv_values[AT_HWCAP]; GLRO(dl_hwcap2) = auxv_values[AT_HWCAP2]; + GLRO(dl_hwcap3) = auxv_values[AT_HWCAP3]; + GLRO(dl_hwcap4) = auxv_values[AT_HWCAP4]; GLRO(dl_clktck) = auxv_values[AT_CLKTCK]; GLRO(dl_fpu_control) = auxv_values[AT_FPUCW]; _dl_random = (void *) auxv_values[AT_RANDOM]; diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c index ad3692d738..e1b14e9eb3 100644 --- a/sysdeps/unix/sysv/linux/dl-sysdep.c +++ b/sysdeps/unix/sysv/linux/dl-sysdep.c @@ -197,6 +197,8 @@ _dl_show_auxv (void) [AT_SYSINFO_EHDR - 2] = { "SYSINFO_EHDR: 0x", hex }, [AT_RANDOM - 2] = { "RANDOM: 0x", hex }, [AT_HWCAP2 - 2] = { "HWCAP2: 0x", hex }, + [AT_HWCAP3 - 2] = { "HWCAP3: 0x", hex }, + [AT_HWCAP4 - 2] = { "HWCAP4: 0x", hex }, [AT_MINSIGSTKSZ - 2] = { "MINSIGSTKSZ: ", dec }, [AT_L1I_CACHESIZE - 2] = { "L1I_CACHESIZE: ", dec }, [AT_L1I_CACHEGEOMETRY - 2] = { "L1I_CACHEGEOMETRY: 0x", hex }, diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c index 8e8a5ec2ea..a947d62db6 100644 --- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c +++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c @@ -94,6 +94,8 @@ init_cpu_features (struct cpu_features *cpu_features, uint64_t hwcaps[]) which are set by __tcb_parse_hwcap_and_convert_at_platform. */ cpu_features->hwcap = hwcaps[0]; cpu_features->hwcap2 = hwcaps[1]; + cpu_features->hwcap3 = hwcaps[2]; + cpu_features->hwcap4 = hwcaps[3]; /* Default is to use aligned memory access on optimized function unless tunables is enable, since for this case user can explicit disable unaligned optimizations. */ diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h index 1294f0b601..e9eb6a13c8 100644 --- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h +++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h @@ -26,6 +26,8 @@ struct cpu_features bool use_cached_memopt; unsigned long int hwcap; unsigned long int hwcap2; + unsigned long int hwcap3; + unsigned long int hwcap4; }; static const char hwcap_names[] = { diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c index a4705daf1c..6a00cd88cd 100644 --- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c +++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c @@ -87,6 +87,12 @@ __libc_start_main_impl (int argc, char **argv, case AT_HWCAP2: _dl_hwcap2 = (unsigned long int) av->a_un.a_val; break; + case AT_HWCAP3: + _dl_hwcap3 = (unsigned long int) av->a_un.a_val; + break; + case AT_HWCAP4: + _dl_hwcap4 = (unsigned long int) av->a_un.a_val; + break; case AT_PLATFORM: _dl_platform = (void *) av->a_un.a_val; break; commit aad45c8ac30aa1072e54903ce6aead22702f244a Author: Amrita H S Date: Tue Mar 19 19:08:47 2024 -0500 powerpc: Placeholder and infrastructure/build support to add Power11 related changes. The following three changes have been added to provide initial Power11 support. 1. Add the directories to hold Power11 files. 2. Add support to select Power11 libraries based on AT_PLATFORM. 3. Let submachine=power11 be set automatically. Reviewed-by: Florian Weimer Reviewed-by: Peter Bergner (cherry picked from commit 1ea051145612f199d8716ecdf78b084b00b5a727) diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h index f8cb343877..b36697ba44 100644 --- a/sysdeps/powerpc/dl-procinfo.h +++ b/sysdeps/powerpc/dl-procinfo.h @@ -38,7 +38,7 @@ #define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \ + PPC_FEATURE_HAS_DFP) -#define _DL_PLATFORMS_COUNT 16 +#define _DL_PLATFORMS_COUNT 17 #define _DL_FIRST_PLATFORM 32 /* Mask to filter out platforms. */ @@ -62,6 +62,7 @@ #define PPC_PLATFORM_POWER8 13 #define PPC_PLATFORM_POWER9 14 #define PPC_PLATFORM_POWER10 15 +#define PPC_PLATFORM_POWER11 16 static inline const char * __attribute__ ((unused)) @@ -89,6 +90,11 @@ _dl_string_platform (const char *str) ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER10; str++; } + else if (str[1] == '1') + { + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER11; + str++; + } else return -1; break; diff --git a/sysdeps/powerpc/powerpc32/power11/Implies b/sysdeps/powerpc/powerpc32/power11/Implies new file mode 100644 index 0000000000..051cbe0f79 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power11/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power10/fpu +powerpc/powerpc32/power10 diff --git a/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies new file mode 100644 index 0000000000..58edb2861d --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power10/fpu/multiarch diff --git a/sysdeps/powerpc/powerpc32/power11/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies new file mode 100644 index 0000000000..c70f0428ba --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power10/multiarch diff --git a/sysdeps/powerpc/powerpc64/be/power11/Implies b/sysdeps/powerpc/powerpc64/be/power11/Implies new file mode 100644 index 0000000000..de481d1c13 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/power11/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/be/power10/fpu +powerpc/powerpc64/be/power10 diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies new file mode 100644 index 0000000000..dff0e13064 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/be/power10/fpu diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies new file mode 100644 index 0000000000..c3f259e009 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/be/power10/fpu/multiarch diff --git a/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies new file mode 100644 index 0000000000..9491a394c9 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/be/power10/multiarch diff --git a/sysdeps/powerpc/powerpc64/le/power11/Implies b/sysdeps/powerpc/powerpc64/le/power11/Implies new file mode 100644 index 0000000000..e18182dcc1 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/power11/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/le/power10/fpu +powerpc/powerpc64/le/power10 diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies new file mode 100644 index 0000000000..e41bd55684 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/le/power10/fpu diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies new file mode 100644 index 0000000000..c838d50931 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/le/power10/fpu/multiarch diff --git a/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies new file mode 100644 index 0000000000..687248c3c2 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/le/power10/multiarch diff --git a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c index 77465d9133..65d3e69303 100644 --- a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c +++ b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c @@ -36,9 +36,11 @@ compute_level (void) return 9; if (strcmp (platform, "power10") == 0) return 10; + if (strcmp (platform, "power11") == 0) + return 11; printf ("warning: unrecognized AT_PLATFORM value: %s\n", platform); - /* Assume that the new platform supports POWER10. */ - return 10; + /* Assume that the new platform supports POWER11. */ + return 11; } static int diff --git a/sysdeps/powerpc/preconfigure b/sysdeps/powerpc/preconfigure index 4de94089a3..9e5a07ab6d 100644 --- a/sysdeps/powerpc/preconfigure +++ b/sysdeps/powerpc/preconfigure @@ -58,7 +58,7 @@ fi ;; - a2|970|power[4-9]|power5x|power6+|power10) + a2|970|power[4-9]|power5x|power6+|power10|power11) submachine=${archcpu} if test ${libc_cv_cc_submachine+y} then : diff --git a/sysdeps/powerpc/preconfigure.ac b/sysdeps/powerpc/preconfigure.ac index 6c63bd8257..14b6dafd4a 100644 --- a/sysdeps/powerpc/preconfigure.ac +++ b/sysdeps/powerpc/preconfigure.ac @@ -46,7 +46,7 @@ case "${machine}:${submachine}" in AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="") ;; - a2|970|power[[4-9]]|power5x|power6+|power10) + a2|970|power[[4-9]]|power5x|power6+|power10|power11) submachine=${archcpu} AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="") ;; commit 983f34a1252de3ca6f2305c211d86530ea42010e Author: caiyinyu Date: Mon Mar 11 16:07:48 2024 +0800 LoongArch: Correct {__ieee754, _}_scalb -> {__ieee754, _}_scalbf diff --git a/sysdeps/loongarch/fpu/e_scalbf.c b/sysdeps/loongarch/fpu/e_scalbf.c index 9f05485236..7c0395fbb5 100644 --- a/sysdeps/loongarch/fpu/e_scalbf.c +++ b/sysdeps/loongarch/fpu/e_scalbf.c @@ -57,4 +57,4 @@ __ieee754_scalbf (float x, float fn) return x; } -libm_alias_finite (__ieee754_scalb, __scalb) +libm_alias_finite (__ieee754_scalbf, __scalbf) commit 7fc8242bf87828c935ac5df5cafb9dc7ab635fd9 Author: H.J. Lu Date: Fri Feb 16 07:17:10 2024 -0800 x86-64: Save APX registers in ld.so trampoline Add APX registers to STATE_SAVE_MASK so that APX registers are saved in ld.so trampoline. This fixes BZ #31371. Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will be used by i386 _dl_tlsdesc_dynamic. Reviewed-by: Noah Goldstein (cherry picked from commit dfb05f8e704edac70db38c4c8ee700769d91a413) diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h index 85d0a8c943..837fd28734 100644 --- a/sysdeps/x86/sysdep.h +++ b/sysdeps/x86/sysdep.h @@ -21,14 +21,54 @@ #include +/* The extended state feature IDs in the state component bitmap. */ +#define X86_XSTATE_X87_ID 0 +#define X86_XSTATE_SSE_ID 1 +#define X86_XSTATE_AVX_ID 2 +#define X86_XSTATE_BNDREGS_ID 3 +#define X86_XSTATE_BNDCFG_ID 4 +#define X86_XSTATE_K_ID 5 +#define X86_XSTATE_ZMM_H_ID 6 +#define X86_XSTATE_ZMM_ID 7 +#define X86_XSTATE_PKRU_ID 9 +#define X86_XSTATE_TILECFG_ID 17 +#define X86_XSTATE_TILEDATA_ID 18 +#define X86_XSTATE_APX_F_ID 19 + +#ifdef __x86_64__ /* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be - aligned to 16 bytes for fxsave and 64 bytes for xsave. */ -#define STATE_SAVE_OFFSET (8 * 7 + 8) - -/* Save SSE, AVX, AVX512, mask and bound registers. */ -#define STATE_SAVE_MASK \ - ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7)) + aligned to 16 bytes for fxsave and 64 bytes for xsave. + + NB: Is is non-zero because of the 128-byte red-zone. Some registers + are saved on stack without adjusting stack pointer first. When we + update stack pointer to allocate more space, we need to take the + red-zone into account. */ +# define STATE_SAVE_OFFSET (8 * 7 + 8) + +/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX + registers are mutually exclusive. */ +# define STATE_SAVE_MASK \ + ((1 << X86_XSTATE_SSE_ID) \ + | (1 << X86_XSTATE_AVX_ID) \ + | (1 << X86_XSTATE_BNDREGS_ID) \ + | (1 << X86_XSTATE_K_ID) \ + | (1 << X86_XSTATE_ZMM_H_ID) \ + | (1 << X86_XSTATE_ZMM_ID) \ + | (1 << X86_XSTATE_APX_F_ID)) +#else +/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386 + doesn't have red-zone, use 0 here. */ +# define STATE_SAVE_OFFSET 0 + +/* Save SSE, AVX, AXV512, mask and bound registers. */ +# define STATE_SAVE_MASK \ + ((1 << X86_XSTATE_SSE_ID) \ + | (1 << X86_XSTATE_AVX_ID) \ + | (1 << X86_XSTATE_BNDREGS_ID) \ + | (1 << X86_XSTATE_K_ID) \ + | (1 << X86_XSTATE_ZMM_H_ID)) +#endif /* Constants for bits in __x86_string_control: */ commit a364304718725a31ab141936322855c76c73e35e Author: H.J. Lu Date: Mon Feb 26 06:37:03 2024 -0800 x86: Update _dl_tlsdesc_dynamic to preserve caller-saved registers Compiler generates the following instruction sequence for GNU2 dynamic TLS access: leaq tls_var@TLSDESC(%rip), %rax call *tls_var@TLSCALL(%rax) or leal tls_var@TLSDESC(%ebx), %eax call *tls_var@TLSCALL(%eax) CALL instruction is transparent to compiler which assumes all registers, except for EFLAGS and RAX/EAX, are unchanged after CALL. When _dl_tlsdesc_dynamic is called, it calls __tls_get_addr on the slow path. __tls_get_addr is a normal function which doesn't preserve any caller-saved registers. _dl_tlsdesc_dynamic saved and restored integer caller-saved registers, but didn't preserve any other caller-saved registers. Add _dl_tlsdesc_dynamic IFUNC functions for FNSAVE, FXSAVE, XSAVE and XSAVEC to save and restore all caller-saved registers. This fixes BZ #31372. Add GLRO(dl_x86_64_runtime_resolve) with GLRO(dl_x86_tlsdesc_dynamic) to optimize elf_machine_runtime_setup. Reviewed-by: Noah Goldstein (cherry picked from commit 0aac205a814a8511e98d02b91a8dc908f1c53cde) diff --git a/elf/Makefile b/elf/Makefile index 5d78b659ce..c5c37a9147 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -424,6 +424,7 @@ tests += \ tst-glibc-hwcaps-prepend \ tst-global1 \ tst-global2 \ + tst-gnu2-tls2 \ tst-initfinilazyfail \ tst-initorder \ tst-initorder2 \ @@ -846,6 +847,9 @@ modules-names += \ tst-filterobj-flt \ tst-finilazyfailmod \ tst-globalmod2 \ + tst-gnu2-tls2mod0 \ + tst-gnu2-tls2mod1 \ + tst-gnu2-tls2mod2 \ tst-initlazyfailmod \ tst-initorder2a \ tst-initorder2b \ @@ -3044,8 +3048,22 @@ $(objpfx)tst-tlsgap.out: \ $(objpfx)tst-tlsgap-mod0.so \ $(objpfx)tst-tlsgap-mod1.so \ $(objpfx)tst-tlsgap-mod2.so + +$(objpfx)tst-gnu2-tls2: $(shared-thread-library) +$(objpfx)tst-gnu2-tls2.out: \ + $(objpfx)tst-gnu2-tls2mod0.so \ + $(objpfx)tst-gnu2-tls2mod1.so \ + $(objpfx)tst-gnu2-tls2mod2.so + ifeq (yes,$(have-mtls-dialect-gnu2)) +# This test fails if dl_tlsdesc_dynamic doesn't preserve all caller-saved +# registers. See https://sourceware.org/bugzilla/show_bug.cgi?id=31372 +test-xfail-tst-gnu2-tls2 = yes + CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2 CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2 CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2 +CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=gnu2 +CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=gnu2 +CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=gnu2 endif diff --git a/elf/tst-gnu2-tls2.c b/elf/tst-gnu2-tls2.c new file mode 100644 index 0000000000..7ac04d7f33 --- /dev/null +++ b/elf/tst-gnu2-tls2.c @@ -0,0 +1,122 @@ +/* Test TLSDESC relocation. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "tst-gnu2-tls2.h" + +#ifndef IS_SUPPORTED +# define IS_SUPPORTED() true +#endif + +/* An architecture can define it to clobber caller-saved registers in + malloc below to verify that the implicit TLSDESC call won't change + caller-saved registers. */ +#ifndef PREPARE_MALLOC +# define PREPARE_MALLOC() +#endif + +extern void * __libc_malloc (size_t); + +size_t malloc_counter = 0; + +void * +malloc (size_t n) +{ + PREPARE_MALLOC (); + malloc_counter++; + return __libc_malloc (n); +} + +static void *mod[3]; +#ifndef MOD +# define MOD(i) "tst-gnu2-tls2mod" #i ".so" +#endif +static const char *modname[3] = { MOD(0), MOD(1), MOD(2) }; +#undef MOD + +static void +open_mod (int i) +{ + mod[i] = xdlopen (modname[i], RTLD_LAZY); + printf ("open %s\n", modname[i]); +} + +static void +close_mod (int i) +{ + xdlclose (mod[i]); + mod[i] = NULL; + printf ("close %s\n", modname[i]); +} + +static void +access_mod (int i, const char *sym) +{ + struct tls var = { -1, -1, -1, -1 }; + struct tls *(*f) (struct tls *) = xdlsym (mod[i], sym); + /* Check that our malloc is called. */ + malloc_counter = 0; + struct tls *p = f (&var); + TEST_VERIFY (malloc_counter != 0); + printf ("access %s: %s() = %p\n", modname[i], sym, p); + TEST_VERIFY_EXIT (memcmp (p, &var, sizeof (var)) == 0); + ++(p->a); +} + +static void * +start (void *arg) +{ + /* The DTV generation is at the last dlopen of mod0 and the + entry for mod1 is NULL. */ + + open_mod (1); /* Reuse modid of mod1. Uses dynamic TLS. */ + + /* Force the slow path in GNU2 TLS descriptor call. */ + access_mod (1, "apply_tls"); + + return arg; +} + +static int +do_test (void) +{ + if (!IS_SUPPORTED ()) + return EXIT_UNSUPPORTED; + + open_mod (0); + open_mod (1); + open_mod (2); + close_mod (0); + close_mod (1); /* Create modid gap at mod1. */ + open_mod (0); /* Reuse modid of mod0, bump generation count. */ + + /* Create a thread where DTV of mod1 is NULL. */ + pthread_t t = xpthread_create (NULL, start, NULL); + xpthread_join (t); + return 0; +} + +#include diff --git a/elf/tst-gnu2-tls2.h b/elf/tst-gnu2-tls2.h new file mode 100644 index 0000000000..77964a57a3 --- /dev/null +++ b/elf/tst-gnu2-tls2.h @@ -0,0 +1,36 @@ +/* Test TLSDESC relocation. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +struct tls +{ + int64_t a, b, c, d; +}; + +extern struct tls *apply_tls (struct tls *); + +/* An architecture can define them to verify that clobber caller-saved + registers aren't changed by the implicit TLSDESC call. */ +#ifndef BEFORE_TLSDESC_CALL +# define BEFORE_TLSDESC_CALL() +#endif + +#ifndef AFTER_TLSDESC_CALL +# define AFTER_TLSDESC_CALL() +#endif diff --git a/elf/tst-gnu2-tls2mod0.c b/elf/tst-gnu2-tls2mod0.c new file mode 100644 index 0000000000..45556a0e17 --- /dev/null +++ b/elf/tst-gnu2-tls2mod0.c @@ -0,0 +1,31 @@ +/* DSO used by tst-gnu2-tls2. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "tst-gnu2-tls2.h" + +__thread struct tls tls_var0 __attribute__ ((visibility ("hidden"))); + +struct tls * +apply_tls (struct tls *p) +{ + BEFORE_TLSDESC_CALL (); + tls_var0 = *p; + struct tls *ret = &tls_var0; + AFTER_TLSDESC_CALL (); + return ret; +} diff --git a/elf/tst-gnu2-tls2mod1.c b/elf/tst-gnu2-tls2mod1.c new file mode 100644 index 0000000000..e10b9dbc0a --- /dev/null +++ b/elf/tst-gnu2-tls2mod1.c @@ -0,0 +1,31 @@ +/* DSO used by tst-gnu2-tls2. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "tst-gnu2-tls2.h" + +__thread struct tls tls_var1[100] __attribute__ ((visibility ("hidden"))); + +struct tls * +apply_tls (struct tls *p) +{ + BEFORE_TLSDESC_CALL (); + tls_var1[1] = *p; + struct tls *ret = &tls_var1[1]; + AFTER_TLSDESC_CALL (); + return ret; +} diff --git a/elf/tst-gnu2-tls2mod2.c b/elf/tst-gnu2-tls2mod2.c new file mode 100644 index 0000000000..141af51e55 --- /dev/null +++ b/elf/tst-gnu2-tls2mod2.c @@ -0,0 +1,31 @@ +/* DSO used by tst-gnu2-tls2. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "tst-gnu2-tls2.h" + +__thread struct tls tls_var2 __attribute__ ((visibility ("hidden"))); + +struct tls * +apply_tls (struct tls *p) +{ + BEFORE_TLSDESC_CALL (); + tls_var2 = *p; + struct tls *ret = &tls_var2; + AFTER_TLSDESC_CALL (); + return ret; +} diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h index fc1ef96587..50d74fe6e9 100644 --- a/sysdeps/i386/dl-machine.h +++ b/sysdeps/i386/dl-machine.h @@ -347,7 +347,7 @@ and creates an unsatisfiable circular dependency.\n", { td->arg = _dl_make_tlsdesc_dynamic (sym_map, sym->st_value + (ElfW(Word))td->arg); - td->entry = _dl_tlsdesc_dynamic; + td->entry = GLRO(dl_x86_tlsdesc_dynamic); } else # endif diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h new file mode 100644 index 0000000000..3627028577 --- /dev/null +++ b/sysdeps/i386/dl-tlsdesc-dynamic.h @@ -0,0 +1,190 @@ +/* Thread-local storage handling in the ELF dynamic linker. i386 version. + Copyright (C) 2004-2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#undef REGISTER_SAVE_AREA + +#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 +# error STATE_SAVE_ALIGNMENT must be multiple of 16 +#endif + +#if DL_RUNTIME_RESOLVE_REALIGN_STACK +# ifdef USE_FNSAVE +# error USE_FNSAVE shouldn't be defined +# endif +# ifdef USE_FXSAVE +/* Use fxsave to save all registers. */ +# define REGISTER_SAVE_AREA 512 +# endif +#else +# ifdef USE_FNSAVE +/* Use fnsave to save x87 FPU stack registers. */ +# define REGISTER_SAVE_AREA 108 +# else +# ifndef USE_FXSAVE +# error USE_FXSAVE must be defined +# endif +/* Use fxsave to save all registers. Add 12 bytes to align the stack + to 16 bytes. */ +# define REGISTER_SAVE_AREA (512 + 12) +# endif +#endif + + .hidden _dl_tlsdesc_dynamic + .global _dl_tlsdesc_dynamic + .type _dl_tlsdesc_dynamic,@function + + /* This function is used for symbols that need dynamic TLS. + + %eax points to the TLS descriptor, such that 0(%eax) points to + _dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct + tlsdesc_dynamic_arg object. It must return in %eax the offset + between the thread pointer and the object denoted by the + argument, without clobbering any registers. + + The assembly code that follows is a rendition of the following + C code, hand-optimized a little bit. + +ptrdiff_t +__attribute__ ((__regparm__ (1))) +_dl_tlsdesc_dynamic (struct tlsdesc *tdp) +{ + struct tlsdesc_dynamic_arg *td = tdp->arg; + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET); + if (__builtin_expect (td->gen_count <= dtv[0].counter + && (dtv[td->tlsinfo.ti_module].pointer.val + != TLS_DTV_UNALLOCATED), + 1)) + return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset + - __thread_pointer; + + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; +} +*/ + cfi_startproc + .align 16 +_dl_tlsdesc_dynamic: + /* Like all TLS resolvers, preserve call-clobbered registers. + We need two scratch regs anyway. */ + subl $32, %esp + cfi_adjust_cfa_offset (32) + movl %ecx, 20(%esp) + movl %edx, 24(%esp) + movl TLSDESC_ARG(%eax), %eax + movl %gs:DTV_OFFSET, %edx + movl TLSDESC_GEN_COUNT(%eax), %ecx + cmpl (%edx), %ecx + ja 2f + movl TLSDESC_MODID(%eax), %ecx + movl (%edx,%ecx,8), %edx + cmpl $-1, %edx + je 2f + movl TLSDESC_MODOFF(%eax), %eax + addl %edx, %eax +1: + movl 20(%esp), %ecx + subl %gs:0, %eax + movl 24(%esp), %edx + addl $32, %esp + cfi_adjust_cfa_offset (-32) + ret + .p2align 4,,7 +2: + cfi_adjust_cfa_offset (32) +#if DL_RUNTIME_RESOLVE_REALIGN_STACK + movl %ebx, -28(%esp) + movl %esp, %ebx + cfi_def_cfa_register(%ebx) + and $-STATE_SAVE_ALIGNMENT, %esp +#endif +#ifdef REGISTER_SAVE_AREA + subl $REGISTER_SAVE_AREA, %esp +# if !DL_RUNTIME_RESOLVE_REALIGN_STACK + cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) +# endif +#else +# if !DL_RUNTIME_RESOLVE_REALIGN_STACK +# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true +# endif + /* Allocate stack space of the required size to save the state. */ + LOAD_PIC_REG (cx) + subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp +#endif +#ifdef USE_FNSAVE + fnsave (%esp) +#elif defined USE_FXSAVE + fxsave (%esp) +#else + /* Save the argument for ___tls_get_addr in EAX. */ + movl %eax, %ecx + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax + xorl %edx, %edx + /* Clear the XSAVE Header. */ +# ifdef USE_XSAVE + movl %edx, (512)(%esp) + movl %edx, (512 + 4 * 1)(%esp) + movl %edx, (512 + 4 * 2)(%esp) + movl %edx, (512 + 4 * 3)(%esp) +# endif + movl %edx, (512 + 4 * 4)(%esp) + movl %edx, (512 + 4 * 5)(%esp) + movl %edx, (512 + 4 * 6)(%esp) + movl %edx, (512 + 4 * 7)(%esp) + movl %edx, (512 + 4 * 8)(%esp) + movl %edx, (512 + 4 * 9)(%esp) + movl %edx, (512 + 4 * 10)(%esp) + movl %edx, (512 + 4 * 11)(%esp) + movl %edx, (512 + 4 * 12)(%esp) + movl %edx, (512 + 4 * 13)(%esp) + movl %edx, (512 + 4 * 14)(%esp) + movl %edx, (512 + 4 * 15)(%esp) +# ifdef USE_XSAVE + xsave (%esp) +# else + xsavec (%esp) +# endif + /* Restore the argument for ___tls_get_addr in EAX. */ + movl %ecx, %eax +#endif + call HIDDEN_JUMPTARGET (___tls_get_addr) + /* Get register content back. */ +#ifdef USE_FNSAVE + frstor (%esp) +#elif defined USE_FXSAVE + fxrstor (%esp) +#else + /* Save and retore ___tls_get_addr return value stored in EAX. */ + movl %eax, %ecx + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax + xorl %edx, %edx + xrstor (%esp) + movl %ecx, %eax +#endif +#if DL_RUNTIME_RESOLVE_REALIGN_STACK + mov %ebx, %esp + cfi_def_cfa_register(%esp) + movl -28(%esp), %ebx + cfi_restore(%ebx) +#else + addl $REGISTER_SAVE_AREA, %esp + cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) +#endif + jmp 1b + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic + +#undef STATE_SAVE_ALIGNMENT diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S index 90d93caa0c..f002feee56 100644 --- a/sysdeps/i386/dl-tlsdesc.S +++ b/sysdeps/i386/dl-tlsdesc.S @@ -18,8 +18,27 @@ #include #include +#include +#include #include "tlsdesc.h" +#ifndef DL_STACK_ALIGNMENT +/* Due to GCC bug: + + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 + + __tls_get_addr may be called with 4-byte stack alignment. Although + this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume + that stack will be always aligned at 16 bytes. */ +# define DL_STACK_ALIGNMENT 4 +#endif + +/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align + stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr. */ +#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ + (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ + || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) + .text /* This function is used to compute the TP offset for symbols in @@ -65,69 +84,35 @@ _dl_tlsdesc_undefweak: .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak #ifdef SHARED - .hidden _dl_tlsdesc_dynamic - .global _dl_tlsdesc_dynamic - .type _dl_tlsdesc_dynamic,@function - - /* This function is used for symbols that need dynamic TLS. - - %eax points to the TLS descriptor, such that 0(%eax) points to - _dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct - tlsdesc_dynamic_arg object. It must return in %eax the offset - between the thread pointer and the object denoted by the - argument, without clobbering any registers. - - The assembly code that follows is a rendition of the following - C code, hand-optimized a little bit. - -ptrdiff_t -__attribute__ ((__regparm__ (1))) -_dl_tlsdesc_dynamic (struct tlsdesc *tdp) -{ - struct tlsdesc_dynamic_arg *td = tdp->arg; - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET); - if (__builtin_expect (td->gen_count <= dtv[0].counter - && (dtv[td->tlsinfo.ti_module].pointer.val - != TLS_DTV_UNALLOCATED), - 1)) - return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset - - __thread_pointer; - - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; -} -*/ - cfi_startproc - .align 16 -_dl_tlsdesc_dynamic: - /* Like all TLS resolvers, preserve call-clobbered registers. - We need two scratch regs anyway. */ - subl $28, %esp - cfi_adjust_cfa_offset (28) - movl %ecx, 20(%esp) - movl %edx, 24(%esp) - movl TLSDESC_ARG(%eax), %eax - movl %gs:DTV_OFFSET, %edx - movl TLSDESC_GEN_COUNT(%eax), %ecx - cmpl (%edx), %ecx - ja .Lslow - movl TLSDESC_MODID(%eax), %ecx - movl (%edx,%ecx,8), %edx - cmpl $-1, %edx - je .Lslow - movl TLSDESC_MODOFF(%eax), %eax - addl %edx, %eax -.Lret: - movl 20(%esp), %ecx - subl %gs:0, %eax - movl 24(%esp), %edx - addl $28, %esp - cfi_adjust_cfa_offset (-28) - ret - .p2align 4,,7 -.Lslow: - cfi_adjust_cfa_offset (28) - call HIDDEN_JUMPTARGET (___tls_get_addr) - jmp .Lret - cfi_endproc - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic +# define USE_FNSAVE +# define MINIMUM_ALIGNMENT 4 +# define STATE_SAVE_ALIGNMENT 4 +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fnsave +# include "dl-tlsdesc-dynamic.h" +# undef _dl_tlsdesc_dynamic +# undef MINIMUM_ALIGNMENT +# undef USE_FNSAVE + +# define MINIMUM_ALIGNMENT 16 + +# define USE_FXSAVE +# define STATE_SAVE_ALIGNMENT 16 +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave +# include "dl-tlsdesc-dynamic.h" +# undef _dl_tlsdesc_dynamic +# undef USE_FXSAVE + +# define USE_XSAVE +# define STATE_SAVE_ALIGNMENT 64 +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsave +# include "dl-tlsdesc-dynamic.h" +# undef _dl_tlsdesc_dynamic +# undef USE_XSAVE + +# define USE_XSAVEC +# define STATE_SAVE_ALIGNMENT 64 +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsavec +# include "dl-tlsdesc-dynamic.h" +# undef _dl_tlsdesc_dynamic +# undef USE_XSAVEC #endif /* SHARED */ diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 4d50b327b5..992aabe43e 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -1,5 +1,5 @@ ifeq ($(subdir),csu) -gen-as-const-headers += cpu-features-offsets.sym +gen-as-const-headers += cpu-features-offsets.sym features-offsets.sym endif ifeq ($(subdir),elf) @@ -86,6 +86,11 @@ endif tst-ifunc-isa-2-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-SSE4_2,-AVX,-AVX2,-AVX512F tst-ifunc-isa-2-static-ENV = $(tst-ifunc-isa-2-ENV) tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd) + +CFLAGS-tst-gnu2-tls2.c += -msse +CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell +CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell +CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell endif ifeq ($(subdir),math) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 25e6622a79..835113b42f 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -27,8 +27,13 @@ extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden; -#if defined SHARED && defined __x86_64__ -# include +#if defined SHARED +extern void _dl_tlsdesc_dynamic_fxsave (void) attribute_hidden; +extern void _dl_tlsdesc_dynamic_xsave (void) attribute_hidden; +extern void _dl_tlsdesc_dynamic_xsavec (void) attribute_hidden; + +# ifdef __x86_64__ +# include static void TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp) @@ -47,6 +52,15 @@ TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp) : plt_rewrite_jmp); } } +# else +extern void _dl_tlsdesc_dynamic_fnsave (void) attribute_hidden; +# endif +#endif + +#ifdef __x86_64__ +extern void _dl_runtime_resolve_fxsave (void) attribute_hidden; +extern void _dl_runtime_resolve_xsave (void) attribute_hidden; +extern void _dl_runtime_resolve_xsavec (void) attribute_hidden; #endif #ifdef __LP64__ @@ -1130,6 +1144,44 @@ no_cpuid: TUNABLE_CALLBACK (set_x86_shstk)); #endif + if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) + { + if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) + { +#ifdef __x86_64__ + GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsavec; +#endif +#ifdef SHARED + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsavec; +#endif + } + else + { +#ifdef __x86_64__ + GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsave; +#endif +#ifdef SHARED + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsave; +#endif + } + } + else + { +#ifdef __x86_64__ + GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave; +# ifdef SHARED + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; +# endif +#else +# ifdef SHARED + if (CPU_FEATURE_USABLE_P (cpu_features, FXSR)) + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; + else + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave; +# endif +#endif + } + #ifdef SHARED # ifdef __x86_64__ TUNABLE_GET (plt_rewrite, tunable_val_t *, diff --git a/sysdeps/x86/dl-procinfo.c b/sysdeps/x86/dl-procinfo.c index ee957b4d70..5920d4b320 100644 --- a/sysdeps/x86/dl-procinfo.c +++ b/sysdeps/x86/dl-procinfo.c @@ -86,3 +86,19 @@ PROCINFO_CLASS const char _dl_x86_platforms[4][9] #else , #endif + +#if defined SHARED && !IS_IN (ldconfig) +# if !defined PROCINFO_DECL + ._dl_x86_tlsdesc_dynamic +# else +PROCINFO_CLASS void * _dl_x86_tlsdesc_dynamic +# endif +# ifndef PROCINFO_DECL += NULL +# endif +# ifdef PROCINFO_DECL +; +# else +, +# endif +#endif diff --git a/sysdeps/x86_64/features-offsets.sym b/sysdeps/x86/features-offsets.sym similarity index 89% rename from sysdeps/x86_64/features-offsets.sym rename to sysdeps/x86/features-offsets.sym index 9e4be3393a..77e990c705 100644 --- a/sysdeps/x86_64/features-offsets.sym +++ b/sysdeps/x86/features-offsets.sym @@ -3,4 +3,6 @@ #include RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET offsetof (struct rtld_global_ro, _dl_x86_cpu_features) +#ifdef __x86_64__ RTLD_GLOBAL_DL_X86_FEATURE_1_OFFSET offsetof (struct rtld_global, _dl_x86_feature_1) +#endif diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h index 837fd28734..485cad9c02 100644 --- a/sysdeps/x86/sysdep.h +++ b/sysdeps/x86/sysdep.h @@ -70,6 +70,12 @@ | (1 << X86_XSTATE_ZMM_H_ID)) #endif +/* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL. + Compiler assumes that all registers, including x87 FPU stack registers, + are unchanged after CALL, except for EFLAGS and RAX/EAX. */ +#define TLSDESC_CALL_STATE_SAVE_MASK \ + (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID)) + /* Constants for bits in __x86_string_control: */ /* Avoid short distance REP MOVSB. */ diff --git a/sysdeps/x86/tst-gnu2-tls2.c b/sysdeps/x86/tst-gnu2-tls2.c new file mode 100644 index 0000000000..de900a423b --- /dev/null +++ b/sysdeps/x86/tst-gnu2-tls2.c @@ -0,0 +1,20 @@ +#ifndef __x86_64__ +#include + +#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2) +#endif + +/* Clear XMM0...XMM7 */ +#define PREPARE_MALLOC() \ +{ \ + asm volatile ("xorps %%xmm0, %%xmm0" : : : "xmm0" ); \ + asm volatile ("xorps %%xmm1, %%xmm1" : : : "xmm1" ); \ + asm volatile ("xorps %%xmm2, %%xmm2" : : : "xmm2" ); \ + asm volatile ("xorps %%xmm3, %%xmm3" : : : "xmm3" ); \ + asm volatile ("xorps %%xmm4, %%xmm4" : : : "xmm4" ); \ + asm volatile ("xorps %%xmm5, %%xmm5" : : : "xmm5" ); \ + asm volatile ("xorps %%xmm6, %%xmm6" : : : "xmm6" ); \ + asm volatile ("xorps %%xmm7, %%xmm7" : : : "xmm7" ); \ +} + +#include diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index 90f4ecfd26..e8babc9a4e 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -10,7 +10,7 @@ LDFLAGS-rtld += -Wl,-z,nomark-plt endif ifeq ($(subdir),csu) -gen-as-const-headers += features-offsets.sym link-defines.sym +gen-as-const-headers += link-defines.sym endif ifeq ($(subdir),gmon) diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h index 6d605d0d32..ff5d45f7cb 100644 --- a/sysdeps/x86_64/dl-machine.h +++ b/sysdeps/x86_64/dl-machine.h @@ -71,9 +71,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], int lazy, int profile) { Elf64_Addr *got; - extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden; - extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden; - extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden; extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden; extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden; extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden; @@ -96,8 +93,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], /* Identify this shared object. */ *(ElfW(Addr) *) (got + 1) = (ElfW(Addr)) l; - const struct cpu_features* cpu_features = __get_cpu_features (); - #ifdef SHARED /* The got[2] entry contains the address of a function which gets called to get the address of a so far unresolved function and @@ -107,6 +102,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], end in this function. */ if (__glibc_unlikely (profile)) { + const struct cpu_features* cpu_features = __get_cpu_features (); if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F)) *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512; else if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX)) @@ -126,15 +122,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], /* This function will get called to fix up the GOT entry indicated by the offset on the stack, and then jump to the resolved address. */ - if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL - || GLRO(dl_x86_cpu_features).xsave_state_size != 0) - *(ElfW(Addr) *) (got + 2) - = (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC) - ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec - : (ElfW(Addr)) &_dl_runtime_resolve_xsave); - else - *(ElfW(Addr) *) (got + 2) - = (ElfW(Addr)) &_dl_runtime_resolve_fxsave; + *(ElfW(Addr) *) (got + 2) + = (ElfW(Addr)) GLRO(dl_x86_64_runtime_resolve); } } @@ -383,7 +372,7 @@ and creates an unsatisfiable circular dependency.\n", { td->arg = _dl_make_tlsdesc_dynamic (sym_map, sym->st_value + reloc->r_addend); - td->entry = _dl_tlsdesc_dynamic; + td->entry = GLRO(dl_x86_tlsdesc_dynamic); } else # endif diff --git a/sysdeps/x86_64/dl-procinfo.c b/sysdeps/x86_64/dl-procinfo.c index 4d1d790fbb..06637a8154 100644 --- a/sysdeps/x86_64/dl-procinfo.c +++ b/sysdeps/x86_64/dl-procinfo.c @@ -41,5 +41,21 @@ #include +#if !IS_IN (ldconfig) +# if !defined PROCINFO_DECL && defined SHARED + ._dl_x86_64_runtime_resolve +# else +PROCINFO_CLASS void * _dl_x86_64_runtime_resolve +# endif +# ifndef PROCINFO_DECL += NULL +# endif +# if !defined SHARED || defined PROCINFO_DECL +; +# else +, +# endif +#endif + #undef PROCINFO_DECL #undef PROCINFO_CLASS diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h new file mode 100644 index 0000000000..0c2e8d5320 --- /dev/null +++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h @@ -0,0 +1,166 @@ +/* Thread-local storage handling in the ELF dynamic linker. x86_64 version. + Copyright (C) 2004-2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef SECTION +# define SECTION(p) p +#endif + +#undef REGISTER_SAVE_AREA +#undef LOCAL_STORAGE_AREA +#undef BASE + +#include "dl-trampoline-state.h" + + .section SECTION(.text),"ax",@progbits + + .hidden _dl_tlsdesc_dynamic + .global _dl_tlsdesc_dynamic + .type _dl_tlsdesc_dynamic,@function + + /* %rax points to the TLS descriptor, such that 0(%rax) points to + _dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct + tlsdesc_dynamic_arg object. It must return in %rax the offset + between the thread pointer and the object denoted by the + argument, without clobbering any registers. + + The assembly code that follows is a rendition of the following + C code, hand-optimized a little bit. + +ptrdiff_t +_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax")) +{ + struct tlsdesc_dynamic_arg *td = tdp->arg; + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET); + if (__builtin_expect (td->gen_count <= dtv[0].counter + && (dtv[td->tlsinfo.ti_module].pointer.val + != TLS_DTV_UNALLOCATED), + 1)) + return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset + - __thread_pointer; + + return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer; +} +*/ + cfi_startproc + .align 16 +_dl_tlsdesc_dynamic: + _CET_ENDBR + /* Preserve call-clobbered registers that we modify. + We need two scratch regs anyway. */ + movq %rsi, -16(%rsp) + mov %fs:DTV_OFFSET, %RSI_LP + movq %rdi, -8(%rsp) + movq TLSDESC_ARG(%rax), %rdi + movq (%rsi), %rax + cmpq %rax, TLSDESC_GEN_COUNT(%rdi) + ja 2f + movq TLSDESC_MODID(%rdi), %rax + salq $4, %rax + movq (%rax,%rsi), %rax + cmpq $-1, %rax + je 2f + addq TLSDESC_MODOFF(%rdi), %rax +1: + movq -16(%rsp), %rsi + sub %fs:0, %RAX_LP + movq -8(%rsp), %rdi + ret +2: +#if DL_RUNTIME_RESOLVE_REALIGN_STACK + movq %rbx, -24(%rsp) + mov %RSP_LP, %RBX_LP + cfi_def_cfa_register(%rbx) + and $-STATE_SAVE_ALIGNMENT, %RSP_LP +#endif +#ifdef REGISTER_SAVE_AREA +# if DL_RUNTIME_RESOLVE_REALIGN_STACK + /* STATE_SAVE_OFFSET has space for 8 integer registers. But we + need space for RCX, RDX, RSI, RDI, R8, R9, R10 and R11, plus + RBX above. */ + sub $(REGISTER_SAVE_AREA + STATE_SAVE_ALIGNMENT), %RSP_LP +# else + sub $REGISTER_SAVE_AREA, %RSP_LP + cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) +# endif +#else + /* Allocate stack space of the required size to save the state. */ + sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP +#endif + /* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9, + r10 and r11. */ + movq %rcx, REGISTER_SAVE_RCX(%rsp) + movq %rdx, REGISTER_SAVE_RDX(%rsp) + movq %r8, REGISTER_SAVE_R8(%rsp) + movq %r9, REGISTER_SAVE_R9(%rsp) + movq %r10, REGISTER_SAVE_R10(%rsp) + movq %r11, REGISTER_SAVE_R11(%rsp) +#ifdef USE_FXSAVE + fxsave STATE_SAVE_OFFSET(%rsp) +#else + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax + xorl %edx, %edx + /* Clear the XSAVE Header. */ +# ifdef USE_XSAVE + movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp) + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp) +# endif + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp) + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp) + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp) + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp) + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp) + movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp) +# ifdef USE_XSAVE + xsave STATE_SAVE_OFFSET(%rsp) +# else + xsavec STATE_SAVE_OFFSET(%rsp) +# endif +#endif + /* %rdi already points to the tlsinfo data structure. */ + call HIDDEN_JUMPTARGET (__tls_get_addr) + # Get register content back. +#ifdef USE_FXSAVE + fxrstor STATE_SAVE_OFFSET(%rsp) +#else + /* Save and retore __tls_get_addr return value stored in RAX. */ + mov %RAX_LP, %RCX_LP + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax + xorl %edx, %edx + xrstor STATE_SAVE_OFFSET(%rsp) + mov %RCX_LP, %RAX_LP +#endif + movq REGISTER_SAVE_R11(%rsp), %r11 + movq REGISTER_SAVE_R10(%rsp), %r10 + movq REGISTER_SAVE_R9(%rsp), %r9 + movq REGISTER_SAVE_R8(%rsp), %r8 + movq REGISTER_SAVE_RDX(%rsp), %rdx + movq REGISTER_SAVE_RCX(%rsp), %rcx +#if DL_RUNTIME_RESOLVE_REALIGN_STACK + mov %RBX_LP, %RSP_LP + cfi_def_cfa_register(%rsp) + movq -24(%rsp), %rbx + cfi_restore(%rbx) +#else + add $REGISTER_SAVE_AREA, %RSP_LP + cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) +#endif + jmp 1b + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic + +#undef STATE_SAVE_ALIGNMENT diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S index f748af2ece..ea69f5223a 100644 --- a/sysdeps/x86_64/dl-tlsdesc.S +++ b/sysdeps/x86_64/dl-tlsdesc.S @@ -18,7 +18,19 @@ #include #include +#include +#include #include "tlsdesc.h" +#include "dl-trampoline-save.h" + +/* Area on stack to save and restore registers used for parameter + passing when calling _dl_tlsdesc_dynamic. */ +#define REGISTER_SAVE_RCX 0 +#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8) +#define REGISTER_SAVE_R8 (REGISTER_SAVE_RDX + 8) +#define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8) +#define REGISTER_SAVE_R10 (REGISTER_SAVE_R9 + 8) +#define REGISTER_SAVE_R11 (REGISTER_SAVE_R10 + 8) .text @@ -67,80 +79,24 @@ _dl_tlsdesc_undefweak: .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak #ifdef SHARED - .hidden _dl_tlsdesc_dynamic - .global _dl_tlsdesc_dynamic - .type _dl_tlsdesc_dynamic,@function - - /* %rax points to the TLS descriptor, such that 0(%rax) points to - _dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct - tlsdesc_dynamic_arg object. It must return in %rax the offset - between the thread pointer and the object denoted by the - argument, without clobbering any registers. - - The assembly code that follows is a rendition of the following - C code, hand-optimized a little bit. - -ptrdiff_t -_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax")) -{ - struct tlsdesc_dynamic_arg *td = tdp->arg; - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET); - if (__builtin_expect (td->gen_count <= dtv[0].counter - && (dtv[td->tlsinfo.ti_module].pointer.val - != TLS_DTV_UNALLOCATED), - 1)) - return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset - - __thread_pointer; - - return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer; -} -*/ - cfi_startproc - .align 16 -_dl_tlsdesc_dynamic: - _CET_ENDBR - /* Preserve call-clobbered registers that we modify. - We need two scratch regs anyway. */ - movq %rsi, -16(%rsp) - mov %fs:DTV_OFFSET, %RSI_LP - movq %rdi, -8(%rsp) - movq TLSDESC_ARG(%rax), %rdi - movq (%rsi), %rax - cmpq %rax, TLSDESC_GEN_COUNT(%rdi) - ja .Lslow - movq TLSDESC_MODID(%rdi), %rax - salq $4, %rax - movq (%rax,%rsi), %rax - cmpq $-1, %rax - je .Lslow - addq TLSDESC_MODOFF(%rdi), %rax -.Lret: - movq -16(%rsp), %rsi - sub %fs:0, %RAX_LP - movq -8(%rsp), %rdi - ret -.Lslow: - /* Besides rdi and rsi, saved above, save rdx, rcx, r8, r9, - r10 and r11. Also, align the stack, that's off by 8 bytes. */ - subq $72, %rsp - cfi_adjust_cfa_offset (72) - movq %rdx, 8(%rsp) - movq %rcx, 16(%rsp) - movq %r8, 24(%rsp) - movq %r9, 32(%rsp) - movq %r10, 40(%rsp) - movq %r11, 48(%rsp) - /* %rdi already points to the tlsinfo data structure. */ - call HIDDEN_JUMPTARGET (__tls_get_addr) - movq 8(%rsp), %rdx - movq 16(%rsp), %rcx - movq 24(%rsp), %r8 - movq 32(%rsp), %r9 - movq 40(%rsp), %r10 - movq 48(%rsp), %r11 - addq $72, %rsp - cfi_adjust_cfa_offset (-72) - jmp .Lret - cfi_endproc - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic +# define USE_FXSAVE +# define STATE_SAVE_ALIGNMENT 16 +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave +# include "dl-tlsdesc-dynamic.h" +# undef _dl_tlsdesc_dynamic +# undef USE_FXSAVE + +# define USE_XSAVE +# define STATE_SAVE_ALIGNMENT 64 +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsave +# include "dl-tlsdesc-dynamic.h" +# undef _dl_tlsdesc_dynamic +# undef USE_XSAVE + +# define USE_XSAVEC +# define STATE_SAVE_ALIGNMENT 64 +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsavec +# include "dl-tlsdesc-dynamic.h" +# undef _dl_tlsdesc_dynamic +# undef USE_XSAVEC #endif /* SHARED */ diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/sysdeps/x86_64/dl-trampoline-save.h new file mode 100644 index 0000000000..84eac4a8ac --- /dev/null +++ b/sysdeps/x86_64/dl-trampoline-save.h @@ -0,0 +1,34 @@ +/* x86-64 PLT trampoline register save macros. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef DL_STACK_ALIGNMENT +/* Due to GCC bug: + + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 + + __tls_get_addr may be called with 8-byte stack alignment. Although + this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume + that stack will be always aligned at 16 bytes. */ +# define DL_STACK_ALIGNMENT 8 +#endif + +/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align + stack to 16 bytes before calling _dl_fixup. */ +#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ + (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ + || 16 > DL_STACK_ALIGNMENT) diff --git a/sysdeps/x86_64/dl-trampoline-state.h b/sysdeps/x86_64/dl-trampoline-state.h new file mode 100644 index 0000000000..575f120797 --- /dev/null +++ b/sysdeps/x86_64/dl-trampoline-state.h @@ -0,0 +1,51 @@ +/* x86-64 PLT dl-trampoline state macros. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if (STATE_SAVE_ALIGNMENT % 16) != 0 +# error STATE_SAVE_ALIGNMENT must be multiple of 16 +#endif + +#if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0 +# error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT +#endif + +#if DL_RUNTIME_RESOLVE_REALIGN_STACK +/* Local stack area before jumping to function address: RBX. */ +# define LOCAL_STORAGE_AREA 8 +# define BASE rbx +# ifdef USE_FXSAVE +/* Use fxsave to save XMM registers. */ +# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET) +# if (REGISTER_SAVE_AREA % 16) != 0 +# error REGISTER_SAVE_AREA must be multiple of 16 +# endif +# endif +#else +# ifndef USE_FXSAVE +# error USE_FXSAVE must be defined +# endif +/* Use fxsave to save XMM registers. */ +# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8) +/* Local stack area before jumping to function address: All saved + registers. */ +# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA +# define BASE rsp +# if (REGISTER_SAVE_AREA % 16) != 8 +# error REGISTER_SAVE_AREA must be odd multiple of 8 +# endif +#endif diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index b2e7e0f69b..87c5137837 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -22,25 +22,7 @@ #include #include #include - -#ifndef DL_STACK_ALIGNMENT -/* Due to GCC bug: - - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 - - __tls_get_addr may be called with 8-byte stack alignment. Although - this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume - that stack will be always aligned at 16 bytes. We use unaligned - 16-byte move to load and store SSE registers, which has no penalty - on modern processors if stack is 16-byte aligned. */ -# define DL_STACK_ALIGNMENT 8 -#endif - -/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align - stack to 16 bytes before calling _dl_fixup. */ -#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ - (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ - || 16 > DL_STACK_ALIGNMENT) +#include "dl-trampoline-save.h" /* Area on stack to save and restore registers used for parameter passing when calling _dl_fixup. */ diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h index f55c6ea040..d9ccfb40d4 100644 --- a/sysdeps/x86_64/dl-trampoline.h +++ b/sysdeps/x86_64/dl-trampoline.h @@ -27,39 +27,7 @@ # undef LOCAL_STORAGE_AREA # undef BASE -# if (STATE_SAVE_ALIGNMENT % 16) != 0 -# error STATE_SAVE_ALIGNMENT must be multiple of 16 -# endif - -# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0 -# error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT -# endif - -# if DL_RUNTIME_RESOLVE_REALIGN_STACK -/* Local stack area before jumping to function address: RBX. */ -# define LOCAL_STORAGE_AREA 8 -# define BASE rbx -# ifdef USE_FXSAVE -/* Use fxsave to save XMM registers. */ -# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET) -# if (REGISTER_SAVE_AREA % 16) != 0 -# error REGISTER_SAVE_AREA must be multiple of 16 -# endif -# endif -# else -# ifndef USE_FXSAVE -# error USE_FXSAVE must be defined -# endif -/* Use fxsave to save XMM registers. */ -# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8) -/* Local stack area before jumping to function address: All saved - registers. */ -# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA -# define BASE rsp -# if (REGISTER_SAVE_AREA % 16) != 8 -# error REGISTER_SAVE_AREA must be odd multiple of 8 -# endif -# endif +# include "dl-trampoline-state.h" .globl _dl_runtime_resolve .hidden _dl_runtime_resolve commit 853e915fdd6ae6c5f1a7a68d2594ec8dbfef1286 Author: H.J. Lu Date: Wed Feb 28 12:08:03 2024 -0800 x86-64: Update _dl_tlsdesc_dynamic to preserve AMX registers _dl_tlsdesc_dynamic should also preserve AMX registers which are caller-saved. Add X86_XSTATE_TILECFG_ID and X86_XSTATE_TILEDATA_ID to x86-64 TLSDESC_CALL_STATE_SAVE_MASK. Compute the AMX state size and save it in xsave_state_full_size which is only used by _dl_tlsdesc_dynamic_xsave and _dl_tlsdesc_dynamic_xsavec. This fixes the AMX part of BZ #31372. Tested on AMX processor. AMX test is enabled only for compilers with the fix for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114098 GCC 14 and GCC 11/12/13 branches have the bug fix. Reviewed-by: Sunil K Pandey (cherry picked from commit 9b7091415af47082664717210ac49d51551456ab) diff --git a/sysdeps/unix/sysv/linux/x86_64/Makefile b/sysdeps/unix/sysv/linux/x86_64/Makefile index 4223feb95f..9a1e7aa646 100644 --- a/sysdeps/unix/sysv/linux/x86_64/Makefile +++ b/sysdeps/unix/sysv/linux/x86_64/Makefile @@ -63,6 +63,33 @@ $(objpfx)libx86-64-isa-level%.os: $(..)/sysdeps/unix/sysv/linux/x86_64/x86-64-is $(objpfx)libx86-64-isa-level.so: $(objpfx)libx86-64-isa-level-1.so cp $< $@ endif + +ifeq (yes,$(have-mamx-tile)) +tests += \ + tst-gnu2-tls2-amx \ +# tests + +modules-names += \ + tst-gnu2-tls2-amx-mod0 \ + tst-gnu2-tls2-amx-mod1 \ + tst-gnu2-tls2-amx-mod2 \ +# modules-names + +$(objpfx)tst-gnu2-tls2-amx: $(shared-thread-library) +$(objpfx)tst-gnu2-tls2-amx.out: \ + $(objpfx)tst-gnu2-tls2-amx-mod0.so \ + $(objpfx)tst-gnu2-tls2-amx-mod1.so \ + $(objpfx)tst-gnu2-tls2-amx-mod2.so +$(objpfx)tst-gnu2-tls2-amx-mod0.so: $(libsupport) +$(objpfx)tst-gnu2-tls2-amx-mod1.so: $(libsupport) +$(objpfx)tst-gnu2-tls2-amx-mod2.so: $(libsupport) + +CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile +CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -mtls-dialect=gnu2 +CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -mtls-dialect=gnu2 +CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -mtls-dialect=gnu2 +endif + endif # $(subdir) == elf ifneq ($(enable-cet),no) diff --git a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h index 2f511321ad..ef4631bf4b 100644 --- a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h +++ b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h @@ -20,3 +20,8 @@ # define ARCH_SHSTK_SHSTK 0x1 # define ARCH_SHSTK_WRSS 0x2 #endif + +#ifndef ARCH_GET_XCOMP_PERM +# define ARCH_GET_XCOMP_PERM 0x1022 +# define ARCH_REQ_XCOMP_PERM 0x1023 +#endif diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c new file mode 100644 index 0000000000..2e0c7b91b7 --- /dev/null +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c @@ -0,0 +1,2 @@ +#include "tst-gnu2-tls2-amx.h" +#include diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c new file mode 100644 index 0000000000..b8a8ccf1c1 --- /dev/null +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c @@ -0,0 +1,2 @@ +#include "tst-gnu2-tls2-amx.h" +#include diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c new file mode 100644 index 0000000000..cdf4a8f363 --- /dev/null +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c @@ -0,0 +1,2 @@ +#include "tst-gnu2-tls2-amx.h" +#include diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c new file mode 100644 index 0000000000..ae4dd82556 --- /dev/null +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c @@ -0,0 +1,83 @@ +/* Test TLSDESC relocation with AMX. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include "tst-gnu2-tls2-amx.h" + +extern int arch_prctl (int, ...); + +#define X86_XSTATE_TILECFG_ID 17 +#define X86_XSTATE_TILEDATA_ID 18 + +/* Initialize tile config. */ +__attribute__ ((noinline, noclone)) +static void +init_tile_config (__tilecfg *tileinfo) +{ + int i; + tileinfo->palette_id = 1; + tileinfo->start_row = 0; + + tileinfo->colsb[0] = MAX_ROWS; + tileinfo->rows[0] = MAX_ROWS; + + for (i = 1; i < 4; ++i) + { + tileinfo->colsb[i] = MAX_COLS; + tileinfo->rows[i] = MAX_ROWS; + } + + _tile_loadconfig (tileinfo); +} + +static bool +enable_amx (void) +{ + uint64_t bitmask; + if (arch_prctl (ARCH_GET_XCOMP_PERM, &bitmask) != 0) + return false; + + if ((bitmask & (1 << X86_XSTATE_TILECFG_ID)) == 0) + return false; + + if (arch_prctl (ARCH_REQ_XCOMP_PERM, X86_XSTATE_TILEDATA_ID) != 0) + return false; + + /* Load tile configuration. */ + __tilecfg tile_data = { 0 }; + init_tile_config (&tile_data); + + return true; +} + +/* An architecture can define it to clobber caller-saved registers in + malloc below to verify that the implicit TLSDESC call won't change + caller-saved registers. */ +static void +clear_tile_register (void) +{ + _tile_zero (2); +} + +#define MOD(i) "tst-gnu2-tls2-amx-mod" #i ".so" +#define IS_SUPPORTED() enable_amx () +#define PREPARE_MALLOC() clear_tile_register () + +#include diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h new file mode 100644 index 0000000000..1845a3caba --- /dev/null +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h @@ -0,0 +1,63 @@ +/* Test TLSDESC relocation with AMX. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +#define MAX_ROWS 16 +#define MAX_COLS 64 +#define MAX 1024 +#define STRIDE 64 + +typedef struct __tile_config +{ + uint8_t palette_id; + uint8_t start_row; + uint8_t reserved_0[14]; + uint16_t colsb[16]; + uint8_t rows[16]; +} __tilecfg __attribute__ ((aligned (64))); + +/* Initialize int8_t buffer */ +static inline void +init_buffer (int8_t *buf, int8_t value) +{ + int rows, colsb, i, j; + rows = MAX_ROWS; + colsb = MAX_COLS; + + for (i = 0; i < rows; i++) + for (j = 0; j < colsb; j++) + buf[i * colsb + j] = value; +} + +#define BEFORE_TLSDESC_CALL() \ + int8_t src[MAX]; \ + int8_t res[MAX]; \ + /* Initialize src with data */ \ + init_buffer (src, 2); \ + /* Load tile rows from memory. */ \ + _tile_loadd (2, src, STRIDE); + +#define AFTER_TLSDESC_CALL() \ + /* Store the tile data to memory. */ \ + _tile_stored (2, res, STRIDE); \ + _tile_release (); \ + TEST_VERIFY_EXIT (memcmp (src, res, sizeof (res)) == 0); diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym index 6a8fd29813..21fc88d651 100644 --- a/sysdeps/x86/cpu-features-offsets.sym +++ b/sysdeps/x86/cpu-features-offsets.sym @@ -3,3 +3,4 @@ #include XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size) +XSAVE_STATE_FULL_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_full_size) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 835113b42f..d71e8d3d2e 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -307,6 +307,8 @@ update_active (struct cpu_features *cpu_features) __cpuid_count (0xd, 0, eax, ebx, ecx, edx); if (ebx != 0) { + /* NB: On AMX capable processors, ebx always includes AMX + states. */ unsigned int xsave_state_full_size = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64); @@ -320,6 +322,11 @@ update_active (struct cpu_features *cpu_features) { unsigned int xstate_comp_offsets[32]; unsigned int xstate_comp_sizes[32]; +#ifdef __x86_64__ + unsigned int xstate_amx_comp_offsets[32]; + unsigned int xstate_amx_comp_sizes[32]; + unsigned int amx_ecx; +#endif unsigned int i; xstate_comp_offsets[0] = 0; @@ -327,16 +334,39 @@ update_active (struct cpu_features *cpu_features) xstate_comp_offsets[2] = 576; xstate_comp_sizes[0] = 160; xstate_comp_sizes[1] = 256; +#ifdef __x86_64__ + xstate_amx_comp_offsets[0] = 0; + xstate_amx_comp_offsets[1] = 160; + xstate_amx_comp_offsets[2] = 576; + xstate_amx_comp_sizes[0] = 160; + xstate_amx_comp_sizes[1] = 256; +#endif for (i = 2; i < 32; i++) { - if ((STATE_SAVE_MASK & (1 << i)) != 0) + if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0) { __cpuid_count (0xd, i, eax, ebx, ecx, edx); - xstate_comp_sizes[i] = eax; +#ifdef __x86_64__ + /* Include this in xsave_state_full_size. */ + amx_ecx = ecx; + xstate_amx_comp_sizes[i] = eax; + if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0) + { + /* Exclude this from xsave_state_size. */ + ecx = 0; + xstate_comp_sizes[i] = 0; + } + else +#endif + xstate_comp_sizes[i] = eax; } else { +#ifdef __x86_64__ + amx_ecx = 0; + xstate_amx_comp_sizes[i] = 0; +#endif ecx = 0; xstate_comp_sizes[i] = 0; } @@ -349,6 +379,15 @@ update_active (struct cpu_features *cpu_features) if ((ecx & (1 << 1)) != 0) xstate_comp_offsets[i] = ALIGN_UP (xstate_comp_offsets[i], 64); +#ifdef __x86_64__ + xstate_amx_comp_offsets[i] + = (xstate_amx_comp_offsets[i - 1] + + xstate_amx_comp_sizes[i - 1]); + if ((amx_ecx & (1 << 1)) != 0) + xstate_amx_comp_offsets[i] + = ALIGN_UP (xstate_amx_comp_offsets[i], + 64); +#endif } } @@ -357,6 +396,18 @@ update_active (struct cpu_features *cpu_features) = xstate_comp_offsets[31] + xstate_comp_sizes[31]; if (size) { +#ifdef __x86_64__ + unsigned int amx_size + = (xstate_amx_comp_offsets[31] + + xstate_amx_comp_sizes[31]); + amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET, + 64); + /* Set xsave_state_full_size to the compact AMX + state size for XSAVEC. NB: xsave_state_full_size + is only used in _dl_tlsdesc_dynamic_xsave and + _dl_tlsdesc_dynamic_xsavec. */ + cpu_features->xsave_state_full_size = amx_size; +#endif cpu_features->xsave_state_size = ALIGN_UP (size + STATE_SAVE_OFFSET, 64); CPU_FEATURE_SET (cpu_features, XSAVEC); diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h index b9bf3115b6..cd7bd27cf3 100644 --- a/sysdeps/x86/include/cpu-features.h +++ b/sysdeps/x86/include/cpu-features.h @@ -934,6 +934,8 @@ struct cpu_features /* The full state size for XSAVE when XSAVEC is disabled by GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC + + and the AMX state size when XSAVEC is available. */ unsigned int xsave_state_full_size; /* Data cache size for use in memory and string routines, typically diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h index 485cad9c02..db8e576e91 100644 --- a/sysdeps/x86/sysdep.h +++ b/sysdeps/x86/sysdep.h @@ -56,6 +56,14 @@ | (1 << X86_XSTATE_ZMM_H_ID) \ | (1 << X86_XSTATE_ZMM_ID) \ | (1 << X86_XSTATE_APX_F_ID)) + +/* AMX state mask. */ +# define AMX_STATE_SAVE_MASK \ + ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID)) + +/* States to be included in xsave_state_full_size. */ +# define FULL_STATE_SAVE_MASK \ + (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK) #else /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386 doesn't have red-zone, use 0 here. */ @@ -68,13 +76,17 @@ | (1 << X86_XSTATE_BNDREGS_ID) \ | (1 << X86_XSTATE_K_ID) \ | (1 << X86_XSTATE_ZMM_H_ID)) + +/* States to be included in xsave_state_size. */ +# define FULL_STATE_SAVE_MASK STATE_SAVE_MASK #endif /* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL. - Compiler assumes that all registers, including x87 FPU stack registers, - are unchanged after CALL, except for EFLAGS and RAX/EAX. */ + Compiler assumes that all registers, including AMX and x87 FPU + stack registers, are unchanged after CALL, except for EFLAGS and + RAX/EAX. */ #define TLSDESC_CALL_STATE_SAVE_MASK \ - (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID)) + (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID)) /* Constants for bits in __x86_string_control: */ diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure index 418cc4a9b8..04a534fa12 100755 --- a/sysdeps/x86_64/configure +++ b/sysdeps/x86_64/configure @@ -134,6 +134,34 @@ fi config_vars="$config_vars enable-cet = $enable_cet" +# Check if -mamx-tile works properly. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -mamx-tile works properly" >&5 +printf %s "checking whether -mamx-tile works properly... " >&6; } +if test ${libc_cv_x86_have_amx_tile+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat > conftest.c < +EOF + libc_cv_x86_have_amx_tile=no + if { ac_try='${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + if grep -q __builtin_ia32_ldtilecfg conftest.i; then + libc_cv_x86_have_amx_tile=yes + fi + fi + rm -rf conftest* +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_x86_have_amx_tile" >&5 +printf "%s\n" "$libc_cv_x86_have_amx_tile" >&6; } +config_vars="$config_vars +have-mamx-tile = $libc_cv_x86_have_amx_tile" + test -n "$critic_missing" && as_fn_error $? " *** $critic_missing" "$LINENO" 5 diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac index d1f803c02e..c714c47351 100644 --- a/sysdeps/x86_64/configure.ac +++ b/sysdeps/x86_64/configure.ac @@ -61,5 +61,20 @@ elif test $enable_cet = permissive; then fi LIBC_CONFIG_VAR([enable-cet], [$enable_cet]) +# Check if -mamx-tile works properly. +AC_CACHE_CHECK(whether -mamx-tile works properly, + libc_cv_x86_have_amx_tile, [dnl +cat > conftest.c < +EOF + libc_cv_x86_have_amx_tile=no + if AC_TRY_COMMAND(${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i); then + if grep -q __builtin_ia32_ldtilecfg conftest.i; then + libc_cv_x86_have_amx_tile=yes + fi + fi + rm -rf conftest*]) +LIBC_CONFIG_VAR([have-mamx-tile], [$libc_cv_x86_have_amx_tile]) + test -n "$critic_missing" && AC_MSG_ERROR([ *** $critic_missing]) diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h index 0c2e8d5320..9f02cfc3eb 100644 --- a/sysdeps/x86_64/dl-tlsdesc-dynamic.h +++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h @@ -99,7 +99,7 @@ _dl_tlsdesc_dynamic: # endif #else /* Allocate stack space of the required size to save the state. */ - sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP + sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip), %RSP_LP #endif /* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9, r10 and r11. */ commit 354cabcb2634abe16da7a2ba5e648aac1204b58e Author: H.J. Lu Date: Mon Mar 18 06:40:16 2024 -0700 x86-64: Allocate state buffer space for RDI, RSI and RBX _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11. Define TLSDESC_CALL_REGISTER_SAVE_AREA to allocate space for RDI, RSI and RBX to avoid clobbering saved RDI, RSI and RBX values on stack by xsave to STATE_SAVE_OFFSET(%rsp). +==================+<- stack frame start aligned at 8 or 16 bytes | |<- RDI saved in the red zone | |<- RSI saved in the red zone | |<- RBX saved in the red zone | |<- paddings for stack realignment of 64 bytes |------------------|<- xsave buffer end aligned at 64 bytes | |<- | |<- | |<- |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp) | |<- 8-byte padding for 64-byte alignment | |<- 8-byte padding for 64-byte alignment | |<- R11 | |<- R10 | |<- R9 | |<- R8 | |<- RDX | |<- RCX +==================+<- RSP aligned at 64 bytes Define TLSDESC_CALL_REGISTER_SAVE_AREA, the total register save area size for all integer registers by adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto stack without adjusting stack pointer first, using the red-zone. This fixes BZ #31501. Reviewed-by: Sunil K Pandey (cherry picked from commit 717ebfa85c8240d32d0d19d86a484c31c55c9617) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index d71e8d3d2e..6fe1b728c6 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -310,7 +310,7 @@ update_active (struct cpu_features *cpu_features) /* NB: On AMX capable processors, ebx always includes AMX states. */ unsigned int xsave_state_full_size - = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64); + = ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64); cpu_features->xsave_state_size = xsave_state_full_size; @@ -400,8 +400,10 @@ update_active (struct cpu_features *cpu_features) unsigned int amx_size = (xstate_amx_comp_offsets[31] + xstate_amx_comp_sizes[31]); - amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET, - 64); + amx_size + = ALIGN_UP ((amx_size + + TLSDESC_CALL_REGISTER_SAVE_AREA), + 64); /* Set xsave_state_full_size to the compact AMX state size for XSAVEC. NB: xsave_state_full_size is only used in _dl_tlsdesc_dynamic_xsave and @@ -409,7 +411,8 @@ update_active (struct cpu_features *cpu_features) cpu_features->xsave_state_full_size = amx_size; #endif cpu_features->xsave_state_size - = ALIGN_UP (size + STATE_SAVE_OFFSET, 64); + = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, + 64); CPU_FEATURE_SET (cpu_features, XSAVEC); } } diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h index db8e576e91..7359149e17 100644 --- a/sysdeps/x86/sysdep.h +++ b/sysdeps/x86/sysdep.h @@ -38,14 +38,59 @@ #ifdef __x86_64__ /* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be - aligned to 16 bytes for fxsave and 64 bytes for xsave. - - NB: Is is non-zero because of the 128-byte red-zone. Some registers - are saved on stack without adjusting stack pointer first. When we - update stack pointer to allocate more space, we need to take the - red-zone into account. */ + aligned to 16 bytes for fxsave and 64 bytes for xsave. It is non-zero + because MOV, instead of PUSH, is used to save registers onto stack. + + +==================+<- stack frame start aligned at 8 or 16 bytes + | |<- paddings for stack realignment of 64 bytes + |------------------|<- xsave buffer end aligned at 64 bytes + | |<- + | |<- + | |<- + |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp) + | |<- 8-byte padding for 64-byte alignment + | |<- R9 + | |<- R8 + | |<- RDI + | |<- RSI + | |<- RDX + | |<- RCX + | |<- RAX + +==================+<- RSP aligned at 64 bytes + + */ # define STATE_SAVE_OFFSET (8 * 7 + 8) +/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning + stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and + R11. Allocate space for RDI, RSI and RBX to avoid clobbering saved + RDI, RSI and RBX values on stack by xsave. + + +==================+<- stack frame start aligned at 8 or 16 bytes + | |<- RDI saved in the red zone + | |<- RSI saved in the red zone + | |<- RBX saved in the red zone + | |<- paddings for stack realignment of 64 bytes + |------------------|<- xsave buffer end aligned at 64 bytes + | |<- + | |<- + | |<- + |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp) + | |<- 8-byte padding for 64-byte alignment + | |<- 8-byte padding for 64-byte alignment + | |<- R11 + | |<- R10 + | |<- R9 + | |<- R8 + | |<- RDX + | |<- RCX + +==================+<- RSP aligned at 64 bytes + + Define the total register save area size for all integer registers by + adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto + stack without adjusting stack pointer first, using the red-zone. */ +# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24) + /* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX registers are mutually exclusive. */ # define STATE_SAVE_MASK \ @@ -66,8 +111,9 @@ (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK) #else /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386 - doesn't have red-zone, use 0 here. */ + uses PUSH to save registers onto stack, use 0 here. */ # define STATE_SAVE_OFFSET 0 +# define TLSDESC_CALL_REGISTER_SAVE_AREA 0 /* Save SSE, AVX, AXV512, mask and bound registers. */ # define STATE_SAVE_MASK \ diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S new file mode 100644 index 0000000000..1d636669ba --- /dev/null +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S @@ -0,0 +1,87 @@ +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +/* On AVX512 machines, OFFSET == 40 caused _dl_tlsdesc_dynamic_xsavec + to clobber %rdi, %rsi and %rbx. On Intel AVX CPUs, the state size + is 960 bytes and this test didn't fail. It may be due to the unused + last 128 bytes. On AMD AVX CPUs, the state size is 832 bytes and + this test might fail without the fix. */ +#ifndef OFFSET +# define OFFSET 40 +#endif + + .text + .p2align 4 + .globl apply_tls + .type apply_tls, @function +apply_tls: + cfi_startproc + _CET_ENDBR + pushq %rbp + cfi_def_cfa_offset (16) + cfi_offset (6, -16) + movdqu (%RDI_LP), %xmm0 + lea tls_var1@TLSDESC(%rip), %RAX_LP + mov %RSP_LP, %RBP_LP + cfi_def_cfa_register (6) + /* Align stack to 64 bytes. */ + and $-64, %RSP_LP + sub $OFFSET, %RSP_LP + pushq %rbx + /* Set %ebx to 0xbadbeef. */ + movl $0xbadbeef, %ebx + movl $0xbadbeef, %esi + movq %rdi, saved_rdi(%rip) + movq %rsi, saved_rsi(%rip) + call *tls_var1@TLSCALL(%RAX_LP) + /* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx. */ + cmpq saved_rdi(%rip), %rdi + jne L(hlt) + cmpq saved_rsi(%rip), %rsi + jne L(hlt) + cmpl $0xbadbeef, %ebx + jne L(hlt) + add %fs:0, %RAX_LP + movups %xmm0, 32(%RAX_LP) + movdqu 16(%RDI_LP), %xmm1 + mov %RAX_LP, %RBX_LP + movups %xmm1, 48(%RAX_LP) + lea 32(%RBX_LP), %RAX_LP + pop %rbx + leave + cfi_def_cfa (7, 8) + ret +L(hlt): + hlt + cfi_endproc + .size apply_tls, .-apply_tls + .hidden tls_var1 + .globl tls_var1 + .section .tbss,"awT",@nobits + .align 16 + .type tls_var1, @object + .size tls_var1, 3200 +tls_var1: + .zero 3200 + .local saved_rdi + .comm saved_rdi,8,8 + .local saved_rsi + .comm saved_rsi,8,8 + .section .note.GNU-stack,"",@progbits commit 15aebdbada54098787715448c94701f17033fc92 Author: Adhemerval Zanella Date: Tue Mar 12 13:21:18 2024 -0300 Ignore undefined symbols for -mtls-dialect=gnu2 So it does not fail for arm config that defaults to -mtp=soft (which issues a call to __aeabi_read_tp). Reviewed-by: H.J. Lu (cherry picked from commit 968b0ca9440040a2b31248a572891f0e55c1ab10) diff --git a/configure b/configure index 59ff1e415d..117b48a421 100755 --- a/configure +++ b/configure @@ -7020,7 +7020,7 @@ void foo (void) } EOF if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles - conftest.c -o conftest 1>&5' + -shared conftest.c -o conftest 1>&5' { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 (eval $ac_try) 2>&5 ac_status=$? diff --git a/configure.ac b/configure.ac index 65799e5685..19b88a47a5 100644 --- a/configure.ac +++ b/configure.ac @@ -1297,7 +1297,7 @@ void foo (void) } EOF if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles - conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD]) + -shared conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD]) then libc_cv_mtls_dialect_gnu2=yes else commit a8ba52bde58c69f2b31da62ad2311f119adf6cb9 Author: Adhemerval Zanella Date: Tue Mar 12 13:21:19 2024 -0300 arm: Update _dl_tlsdesc_dynamic to preserve caller-saved registers (BZ 31372) ARM _dl_tlsdesc_dynamic slow path has two issues: * The ip/r12 is defined by AAPCS as a scratch register, and gcc is used to save the stack pointer before on some function calls. So it should also be saved/restored as well. It fixes the tst-gnu2-tls2. * None of the possible VFP registers are saved/restored. ARM has the additional complexity to have different VFP bank sizes (depending of VFP support by the chip). The tst-gnu2-tls2 test is extended to check for VFP registers, although only for hardfp builds. Different than setcontext, _dl_tlsdesc_dynamic does not have HWCAP_ARM_IWMMXT (I don't have a way to properly test it and it is almost a decade since newer hardware was released). With this patch there is no need to mark tst-gnu2-tls2 as XFAIL. Checked on arm-linux-gnueabihf. Reviewed-by: H.J. Lu (cherry picked from commit 64c7e344289ed085517c2227d8e3b06388242c13) diff --git a/config.h.in b/config.h.in index 44a34072a4..4d33c63a84 100644 --- a/config.h.in +++ b/config.h.in @@ -141,6 +141,9 @@ /* LOONGARCH floating-point ABI for ld.so. */ #undef LOONGARCH_ABI_FRLEN +/* Define whether ARM used hard-float and support VFPvX-D32. */ +#undef HAVE_ARM_PCS_VFP_D32 + /* Linux specific: minimum supported kernel version. */ #undef __LINUX_KERNEL_VERSION diff --git a/elf/Makefile b/elf/Makefile index c5c37a9147..030db4d207 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -3056,10 +3056,6 @@ $(objpfx)tst-gnu2-tls2.out: \ $(objpfx)tst-gnu2-tls2mod2.so ifeq (yes,$(have-mtls-dialect-gnu2)) -# This test fails if dl_tlsdesc_dynamic doesn't preserve all caller-saved -# registers. See https://sourceware.org/bugzilla/show_bug.cgi?id=31372 -test-xfail-tst-gnu2-tls2 = yes - CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2 CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2 CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2 diff --git a/elf/tst-gnu2-tls2.h b/elf/tst-gnu2-tls2.h index 77964a57a3..1ade8151e2 100644 --- a/elf/tst-gnu2-tls2.h +++ b/elf/tst-gnu2-tls2.h @@ -27,6 +27,10 @@ extern struct tls *apply_tls (struct tls *); /* An architecture can define them to verify that clobber caller-saved registers aren't changed by the implicit TLSDESC call. */ +#ifndef INIT_TLSDESC_CALL +# define INIT_TLSDESC_CALL() +#endif + #ifndef BEFORE_TLSDESC_CALL # define BEFORE_TLSDESC_CALL() #endif diff --git a/elf/tst-gnu2-tls2mod0.c b/elf/tst-gnu2-tls2mod0.c index 45556a0e17..3fe3c14277 100644 --- a/elf/tst-gnu2-tls2mod0.c +++ b/elf/tst-gnu2-tls2mod0.c @@ -16,13 +16,14 @@ License along with the GNU C Library; if not, see . */ -#include "tst-gnu2-tls2.h" +#include __thread struct tls tls_var0 __attribute__ ((visibility ("hidden"))); struct tls * apply_tls (struct tls *p) { + INIT_TLSDESC_CALL (); BEFORE_TLSDESC_CALL (); tls_var0 = *p; struct tls *ret = &tls_var0; diff --git a/elf/tst-gnu2-tls2mod1.c b/elf/tst-gnu2-tls2mod1.c index e10b9dbc0a..e210538468 100644 --- a/elf/tst-gnu2-tls2mod1.c +++ b/elf/tst-gnu2-tls2mod1.c @@ -16,13 +16,14 @@ License along with the GNU C Library; if not, see . */ -#include "tst-gnu2-tls2.h" +#include __thread struct tls tls_var1[100] __attribute__ ((visibility ("hidden"))); struct tls * apply_tls (struct tls *p) { + INIT_TLSDESC_CALL (); BEFORE_TLSDESC_CALL (); tls_var1[1] = *p; struct tls *ret = &tls_var1[1]; diff --git a/elf/tst-gnu2-tls2mod2.c b/elf/tst-gnu2-tls2mod2.c index 141af51e55..6d3031dc5f 100644 --- a/elf/tst-gnu2-tls2mod2.c +++ b/elf/tst-gnu2-tls2mod2.c @@ -16,13 +16,14 @@ License along with the GNU C Library; if not, see . */ -#include "tst-gnu2-tls2.h" +#include __thread struct tls tls_var2 __attribute__ ((visibility ("hidden"))); struct tls * apply_tls (struct tls *p) { + INIT_TLSDESC_CALL (); BEFORE_TLSDESC_CALL (); tls_var2 = *p; struct tls *ret = &tls_var2; diff --git a/sysdeps/arm/configure b/sysdeps/arm/configure index 35e2918922..4ef4d46cbd 100644 --- a/sysdeps/arm/configure +++ b/sysdeps/arm/configure @@ -187,6 +187,38 @@ else default-abi = soft" fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether VFP supports 32 registers" >&5 +printf %s "checking whether VFP supports 32 registers... " >&6; } +if test ${libc_cv_arm_pcs_vfp_d32+y} +then : + printf %s "(cached) " >&6 +else $as_nop + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +void foo (void) +{ + asm volatile ("vldr d16,=17" : : : "d16"); +} + +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + libc_cv_arm_pcs_vfp_d32=yes +else $as_nop + libc_cv_arm_pcs_vfp_d32=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_arm_pcs_vfp_d32" >&5 +printf "%s\n" "$libc_cv_arm_pcs_vfp_d32" >&6; } +if test "$libc_cv_arm_pcs_vfp_d32" = yes ; +then + printf "%s\n" "#define HAVE_ARM_PCS_VFP_D32 1" >>confdefs.h + +fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PC-relative relocs in movw/movt work properly" >&5 printf %s "checking whether PC-relative relocs in movw/movt work properly... " >&6; } if test ${libc_cv_arm_pcrel_movw+y} diff --git a/sysdeps/arm/configure.ac b/sysdeps/arm/configure.ac index 5172e30bbe..cd00ddc9d9 100644 --- a/sysdeps/arm/configure.ac +++ b/sysdeps/arm/configure.ac @@ -21,6 +21,21 @@ else LIBC_CONFIG_VAR([default-abi], [soft]) fi +AC_CACHE_CHECK([whether VFP supports 32 registers], + libc_cv_arm_pcs_vfp_d32, [ +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +void foo (void) +{ + asm volatile ("vldr d16,=17" : : : "d16"); +} +]])], + [libc_cv_arm_pcs_vfp_d32=yes], + [libc_cv_arm_pcs_vfp_d32=no])]) +if test "$libc_cv_arm_pcs_vfp_d32" = yes ; +then + AC_DEFINE(HAVE_ARM_PCS_VFP_D32) +fi + AC_CACHE_CHECK([whether PC-relative relocs in movw/movt work properly], libc_cv_arm_pcrel_movw, [ cat > conftest.s <<\EOF diff --git a/sysdeps/arm/dl-tlsdesc.S b/sysdeps/arm/dl-tlsdesc.S index 764c56e70f..ada106521d 100644 --- a/sysdeps/arm/dl-tlsdesc.S +++ b/sysdeps/arm/dl-tlsdesc.S @@ -19,6 +19,7 @@ #include #include #include +#include #include "tlsdesc.h" .text @@ -83,14 +84,20 @@ _dl_tlsdesc_dynamic(struct tlsdesc *tdp) .align 2 _dl_tlsdesc_dynamic: /* Our calling convention is to clobber r0, r1 and the processor - flags. All others that are modified must be saved */ - eabi_save ({r2,r3,r4,lr}) - push {r2,r3,r4,lr} - cfi_adjust_cfa_offset (16) + flags. All others that are modified must be saved. r5 is + used as the hwcap value to avoid reload after __tls_get_addr + call. If required we will save the vector register on the slow + path. */ + eabi_save ({r2,r3,r4,r5,ip,lr}) + push {r2,r3,r4,r5,ip,lr} + cfi_adjust_cfa_offset (24) cfi_rel_offset (r2,0) cfi_rel_offset (r3,4) cfi_rel_offset (r4,8) - cfi_rel_offset (lr,12) + cfi_rel_offset (r5,12) + cfi_rel_offset (ip,16) + cfi_rel_offset (lr,20) + ldr r1, [r0] /* td */ GET_TLS (lr) mov r4, r0 /* r4 = tp */ @@ -113,22 +120,69 @@ _dl_tlsdesc_dynamic: rsbne r0, r4, r3 bne 2f 1: mov r0, r1 + + /* Load the hwcap to check for vector support. */ + ldr r2, 3f + ldr r1, .Lrtld_global_ro +0: add r2, pc, r2 + ldr r2, [r2, r1] + ldr r5, [r2, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET] + +#ifdef __SOFTFP__ + tst r5, #HWCAP_ARM_VFP + beq .Lno_vfp +#endif + + /* Store the VFP registers. Don't use VFP instructions directly + because this code is used in non-VFP multilibs. */ +#define VFP_STACK_REQ (32*8 + 8) + sub sp, sp, VFP_STACK_REQ + cfi_adjust_cfa_offset (VFP_STACK_REQ) + mov r3, sp + .inst 0xeca30b20 /* vstmia r3!, {d0-d15} */ + tst r5, #HWCAP_ARM_VFPD32 + beq 4f + .inst 0xece30b20 /* vstmia r3!, {d16-d31} */ + /* Store the floating-point status register. */ +4: .inst 0xeef12a10 /* vmrs r2, fpscr */ + str r2, [r3] +.Lno_vfp: bl __tls_get_addr rsb r0, r4, r0 +#ifdef __SOFTFP__ + tst r5, #HWCAP_ARM_VFP + beq 2f +#endif + mov r3, sp + .inst 0xecb30b20 /* vldmia r3!, {d0-d15} */ + tst r5, #HWCAP_ARM_VFPD32 + beq 5f + .inst 0xecf30b20 /* vldmia r3!, {d16-d31} */ + ldr r4, [r3] +5: .inst 0xeee14a10 /* vmsr fpscr, r4 */ + add sp, sp, VFP_STACK_REQ + cfi_adjust_cfa_offset (-VFP_STACK_REQ) + 2: #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \ || defined (ARM_ALWAYS_BX)) - pop {r2,r3,r4, lr} - cfi_adjust_cfa_offset (-16) + pop {r2,r3,r4,r5,ip, lr} + cfi_adjust_cfa_offset (-20) cfi_restore (lr) + cfi_restore (ip) + cfi_restore (r5) cfi_restore (r4) cfi_restore (r3) cfi_restore (r2) bx lr #else - pop {r2,r3,r4, pc} + pop {r2,r3,r4,r5,ip, pc} #endif eabi_fnend cfi_endproc .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic + +3: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS +.Lrtld_global_ro: + .long C_SYMBOL_NAME(_rtld_global_ro)(GOT) #endif /* SHARED */ diff --git a/sysdeps/arm/tst-gnu2-tls2.h b/sysdeps/arm/tst-gnu2-tls2.h new file mode 100644 index 0000000000..e413ac21fb --- /dev/null +++ b/sysdeps/arm/tst-gnu2-tls2.h @@ -0,0 +1,128 @@ +/* Test TLSDESC relocation. ARM version. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +#ifndef __SOFTFP__ + +# ifdef HAVE_ARM_PCS_VFP_D32 +# define SAVE_VFP_D32 \ + asm volatile ("vldr d16,=17" : : : "d16"); \ + asm volatile ("vldr d17,=18" : : : "d17"); \ + asm volatile ("vldr d18,=19" : : : "d18"); \ + asm volatile ("vldr d19,=20" : : : "d19"); \ + asm volatile ("vldr d20,=21" : : : "d20"); \ + asm volatile ("vldr d21,=22" : : : "d21"); \ + asm volatile ("vldr d22,=23" : : : "d22"); \ + asm volatile ("vldr d23,=24" : : : "d23"); \ + asm volatile ("vldr d24,=25" : : : "d24"); \ + asm volatile ("vldr d25,=26" : : : "d25"); \ + asm volatile ("vldr d26,=27" : : : "d26"); \ + asm volatile ("vldr d27,=28" : : : "d27"); \ + asm volatile ("vldr d28,=29" : : : "d28"); \ + asm volatile ("vldr d29,=30" : : : "d29"); \ + asm volatile ("vldr d30,=31" : : : "d30"); \ + asm volatile ("vldr d31,=32" : : : "d31"); +# else +# define SAVE_VFP_D32 +# endif + +# define INIT_TLSDESC_CALL() \ + unsigned long hwcap = getauxval (AT_HWCAP) + +/* Set each vector register to a value from 1 to 32 before the TLS access, + dump to memory after TLS access, and compare with the expected values. */ + +# define BEFORE_TLSDESC_CALL() \ + if (hwcap & HWCAP_ARM_VFP) \ + { \ + asm volatile ("vldr d0,=1" : : : "d0"); \ + asm volatile ("vldr d1,=2" : : : "d1"); \ + asm volatile ("vldr d2,=3" : : : "d1"); \ + asm volatile ("vldr d3,=4" : : : "d3"); \ + asm volatile ("vldr d4,=5" : : : "d4"); \ + asm volatile ("vldr d5,=6" : : : "d5"); \ + asm volatile ("vldr d6,=7" : : : "d6"); \ + asm volatile ("vldr d7,=8" : : : "d7"); \ + asm volatile ("vldr d8,=9" : : : "d8"); \ + asm volatile ("vldr d9,=10" : : : "d9"); \ + asm volatile ("vldr d10,=11" : : : "d10"); \ + asm volatile ("vldr d11,=12" : : : "d11"); \ + asm volatile ("vldr d12,=13" : : : "d12"); \ + asm volatile ("vldr d13,=14" : : : "d13"); \ + asm volatile ("vldr d14,=15" : : : "d14"); \ + asm volatile ("vldr d15,=16" : : : "d15"); \ + } \ + if (hwcap & HWCAP_ARM_VFPD32) \ + { \ + SAVE_VFP_D32 \ + } + +# define VFP_STACK_REQ (16*8) +# if __BYTE_ORDER == __BIG_ENDIAN +# define DISP 7 +# else +# define DISP 0 +# endif + +# ifdef HAVE_ARM_PCS_VFP_D32 +# define CHECK_VFP_D32 \ + char vfp[VFP_STACK_REQ]; \ + asm volatile ("vstmia %0, {d16-d31}\n" \ + : \ + : "r" (vfp) \ + : "memory"); \ + \ + char expected[VFP_STACK_REQ] = { 0 }; \ + for (int i = 0; i < 16; ++i) \ + expected[i * 8 + DISP] = i + 17; \ + \ + if (memcmp (vfp, expected, VFP_STACK_REQ) != 0) \ + abort (); +# else +# define CHECK_VFP_D32 +# endif + +# define AFTER_TLSDESC_CALL() \ + if (hwcap & HWCAP_ARM_VFP) \ + { \ + char vfp[VFP_STACK_REQ]; \ + asm volatile ("vstmia %0, {d0-d15}\n" \ + : \ + : "r" (vfp) \ + : "memory"); \ + \ + char expected[VFP_STACK_REQ] = { 0 }; \ + for (int i = 0; i < 16; ++i) \ + expected[i * 8 + DISP] = i + 1; \ + \ + if (memcmp (vfp, expected, VFP_STACK_REQ) != 0) \ + abort (); \ + } \ + if (hwcap & HWCAP_ARM_VFPD32) \ + { \ + CHECK_VFP_D32 \ + } + +#endif /* __SOFTFP__ */ + +#include_next commit aded2fc004e7ee85cf0b45b1382552d41e555a23 Author: Adhemerval Zanella Date: Tue Mar 12 13:21:20 2024 -0300 elf: Enable TLS descriptor tests on aarch64 The aarch64 uses 'trad' for traditional tls and 'desc' for tls descriptors, but unlike other targets it defaults to 'desc'. The gnutls2 configure check does not set aarch64 as an ABI that uses TLS descriptors, which then disable somes stests. Also rename the internal machinery fron gnu2 to tls descriptors. Checked on aarch64-linux-gnu. Reviewed-by: H.J. Lu (cherry picked from commit 3d53d18fc71c5d9ef4773b8bce04d54b80181926) diff --git a/configure b/configure index 117b48a421..432e40a592 100755 --- a/configure +++ b/configure @@ -653,7 +653,7 @@ LIBGD libc_cv_cc_loop_to_function libc_cv_cc_submachine libc_cv_cc_nofma -libc_cv_mtls_dialect_gnu2 +libc_cv_mtls_descriptor libc_cv_has_glob_dat libc_cv_fpie libc_cv_z_execstack @@ -4760,6 +4760,9 @@ libc_config_ok=no # whether to use such directories. with_fp_cond=1 +# A preconfigure script may define another name to TLS descriptor variant +mtls_descriptor=gnu2 + if frags=`ls -d $srcdir/sysdeps/*/preconfigure 2> /dev/null` then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysdeps preconfigure fragments" >&5 @@ -7006,9 +7009,9 @@ fi printf "%s\n" "$libc_cv_has_glob_dat" >&6; } -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -mtls-dialect=gnu2" >&5 -printf %s "checking for -mtls-dialect=gnu2... " >&6; } -if test ${libc_cv_mtls_dialect_gnu2+y} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for tls descriptor support" >&5 +printf %s "checking for tls descriptor support... " >&6; } +if test ${libc_cv_mtls_descriptor+y} then : printf %s "(cached) " >&6 else $as_nop @@ -7019,7 +7022,7 @@ void foo (void) i = 10; } EOF -if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles +if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_descriptor -nostdlib -nostartfiles -shared conftest.c -o conftest 1>&5' { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 (eval $ac_try) 2>&5 @@ -7027,17 +7030,17 @@ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nost printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; } then - libc_cv_mtls_dialect_gnu2=yes + libc_cv_mtls_descriptor=$mtls_descriptor else - libc_cv_mtls_dialect_gnu2=no + libc_cv_mtls_descriptor=no fi rm -f conftest* fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mtls_dialect_gnu2" >&5 -printf "%s\n" "$libc_cv_mtls_dialect_gnu2" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mtls_descriptor" >&5 +printf "%s\n" "$libc_cv_mtls_descriptor" >&6; } config_vars="$config_vars -have-mtls-dialect-gnu2 = $libc_cv_mtls_dialect_gnu2" +have-mtls-descriptor = $libc_cv_mtls_descriptor" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if -Wno-ignored-attributes is required for aliases" >&5 printf %s "checking if -Wno-ignored-attributes is required for aliases... " >&6; } diff --git a/configure.ac b/configure.ac index 19b88a47a5..bdc385d03c 100644 --- a/configure.ac +++ b/configure.ac @@ -442,6 +442,9 @@ libc_config_ok=no # whether to use such directories. with_fp_cond=1 +# A preconfigure script may define another name to TLS descriptor variant +mtls_descriptor=gnu2 + dnl Let sysdeps/*/preconfigure act here. LIBC_PRECONFIGURE([$srcdir], [for sysdeps]) @@ -1287,7 +1290,7 @@ fi rm -f conftest*]) AC_SUBST(libc_cv_has_glob_dat) -AC_CACHE_CHECK([for -mtls-dialect=gnu2], libc_cv_mtls_dialect_gnu2, +AC_CACHE_CHECK([for tls descriptor support], libc_cv_mtls_descriptor, [dnl cat > conftest.c <&AS_MESSAGE_LOG_FD]) then - libc_cv_mtls_dialect_gnu2=yes + libc_cv_mtls_descriptor=$mtls_descriptor else - libc_cv_mtls_dialect_gnu2=no + libc_cv_mtls_descriptor=no fi rm -f conftest*]) -AC_SUBST(libc_cv_mtls_dialect_gnu2) -LIBC_CONFIG_VAR([have-mtls-dialect-gnu2], [$libc_cv_mtls_dialect_gnu2]) +AC_SUBST(libc_cv_mtls_descriptor) +LIBC_CONFIG_VAR([have-mtls-descriptor], [$libc_cv_mtls_descriptor]) dnl clang emits an warning for a double alias redirection, to warn the dnl original symbol is sed even when weak definition overrides it. diff --git a/elf/Makefile b/elf/Makefile index 030db4d207..69aa423c4b 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -999,13 +999,13 @@ modules-names-tests = $(filter-out ifuncmod% tst-tlsmod%,\ # For +depfiles in Makerules. extra-test-objs += tst-auditmod17.os -ifeq (yes,$(have-mtls-dialect-gnu2)) +ifneq (no,$(have-mtls-descriptor)) tests += tst-gnu2-tls1 modules-names += tst-gnu2-tls1mod $(objpfx)tst-gnu2-tls1: $(objpfx)tst-gnu2-tls1mod.so tst-gnu2-tls1mod.so-no-z-defs = yes -CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=gnu2 -endif # $(have-mtls-dialect-gnu2) +CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=$(have-mtls-descriptor) +endif # $(have-mtls-descriptor) ifeq (yes,$(have-protected-data)) modules-names += tst-protected1moda tst-protected1modb @@ -2972,11 +2972,11 @@ $(objpfx)tst-tls-allocation-failure-static-patched.out: \ $(objpfx)tst-audit-tlsdesc: $(objpfx)tst-audit-tlsdesc-mod1.so \ $(objpfx)tst-audit-tlsdesc-mod2.so \ $(shared-thread-library) -ifeq (yes,$(have-mtls-dialect-gnu2)) +ifneq (no,$(have-mtls-descriptor)) # The test is valid for all TLS types, but we want to exercise GNU2 # TLS if possible. -CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=gnu2 -CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=gnu2 +CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=$(have-mtls-descriptor) +CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=$(have-mtls-descriptor) endif $(objpfx)tst-audit-tlsdesc-dlopen: $(shared-thread-library) $(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-audit-tlsdesc-mod1.so \ @@ -3055,11 +3055,11 @@ $(objpfx)tst-gnu2-tls2.out: \ $(objpfx)tst-gnu2-tls2mod1.so \ $(objpfx)tst-gnu2-tls2mod2.so -ifeq (yes,$(have-mtls-dialect-gnu2)) -CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2 -CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2 -CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2 -CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=gnu2 -CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=gnu2 -CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=gnu2 +ifneq (no,$(have-mtls-descriptor)) +CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=$(have-mtls-descriptor) +CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=$(have-mtls-descriptor) +CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=$(have-mtls-descriptor) +CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=$(have-mtls-descriptor) +CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=$(have-mtls-descriptor) +CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=$(have-mtls-descriptor) endif diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure index d9bd1f8558..19657b627b 100644 --- a/sysdeps/aarch64/preconfigure +++ b/sysdeps/aarch64/preconfigure @@ -2,5 +2,6 @@ case "$machine" in aarch64*) base_machine=aarch64 machine=aarch64 + mtls_descriptor=desc ;; esac diff --git a/sysdeps/arm/Makefile b/sysdeps/arm/Makefile index d5cea717a9..619474eca9 100644 --- a/sysdeps/arm/Makefile +++ b/sysdeps/arm/Makefile @@ -13,15 +13,15 @@ $(objpfx)libgcc-stubs.a: $(objpfx)aeabi_unwind_cpp_pr1.os lib-noranlib: $(objpfx)libgcc-stubs.a ifeq ($(build-shared),yes) -ifeq (yes,$(have-mtls-dialect-gnu2)) +ifneq (no,$(have-mtls-descriptor)) tests += tst-armtlsdescloc tst-armtlsdescextnow tst-armtlsdescextlazy modules-names += tst-armtlsdesclocmod modules-names += tst-armtlsdescextlazymod tst-armtlsdescextnowmod CPPFLAGS-tst-armtlsdescextnowmod.c += -Dstatic= CPPFLAGS-tst-armtlsdescextlazymod.c += -Dstatic= -CFLAGS-tst-armtlsdesclocmod.c += -mtls-dialect=gnu2 -CFLAGS-tst-armtlsdescextnowmod.c += -mtls-dialect=gnu2 -CFLAGS-tst-armtlsdescextlazymod.c += -mtls-dialect=gnu2 +CFLAGS-tst-armtlsdesclocmod.c += -mtls-dialect=$(have-mtls-descriptor) +CFLAGS-tst-armtlsdescextnowmod.c += -mtls-dialect=$(have-mtls-descriptor) +CFLAGS-tst-armtlsdescextlazymod.c += -mtls-dialect=$(have-mtls-descriptor) LDFLAGS-tst-armtlsdescextnowmod.so += -Wl,-z,now tst-armtlsdescloc-ENV = LD_BIND_NOW=1 tst-armtlsdescextnow-ENV = LD_BIND_NOW=1 commit 5a461f2949ded98d8211939f84988bc464c7b4fe Author: Andreas Schwab Date: Tue Mar 19 13:49:50 2024 +0100 Add tst-gnu2-tls2mod1 to test-internal-extras That allows sysdeps/x86_64/tst-gnu2-tls2mod1.S to use internal headers. Fixes: 717ebfa85c ("x86-64: Allocate state buffer space for RDI, RSI and RBX") (cherry picked from commit fd7ee2e6c5eb49e4a630a9978b4d668bff6354ee) diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index e8babc9a4e..9d374a3299 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -210,6 +210,8 @@ tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2 $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so endif +test-internal-extras += tst-gnu2-tls2mod1 + endif # $(subdir) == elf ifeq ($(subdir),csu) commit aa4249266e9906c4bc833e4847f4d8feef59504f Author: Adhemerval Zanella Date: Thu Feb 8 10:08:38 2024 -0300 x86: Fix Zen3/Zen4 ERMS selection (BZ 30994) The REP MOVSB usage on memcpy/memmove does not show much performance improvement on Zen3/Zen4 cores compared to the vectorized loops. Also, as from BZ 30994, if the source is aligned and the destination is not the performance can be 20x slower. The performance difference is noticeable with small buffer sizes, closer to the lower bounds limits when memcpy/memmove starts to use ERMS. The performance of REP MOVSB is similar to vectorized instruction on the size limit (the L2 cache). Also, there is no drawback to multiple cores sharing the cache. Checked on x86_64-linux-gnu on Zen3. Reviewed-by: H.J. Lu (cherry picked from commit 0c0d39fe4aeb0f69b26e76337c5dfd5530d5d44e) diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h index d5101615e3..f34d12846c 100644 --- a/sysdeps/x86/dl-cacheinfo.h +++ b/sysdeps/x86/dl-cacheinfo.h @@ -791,7 +791,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) long int data = -1; long int shared = -1; long int shared_per_thread = -1; - long int core = -1; unsigned int threads = 0; unsigned long int level1_icache_size = -1; unsigned long int level1_icache_linesize = -1; @@ -809,7 +808,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) if (cpu_features->basic.kind == arch_kind_intel) { data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features); - core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features); shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features); shared_per_thread = shared; @@ -822,7 +820,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) = handle_intel (_SC_LEVEL1_DCACHE_ASSOC, cpu_features); level1_dcache_linesize = handle_intel (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features); - level2_cache_size = core; + level2_cache_size + = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features); level2_cache_assoc = handle_intel (_SC_LEVEL2_CACHE_ASSOC, cpu_features); level2_cache_linesize @@ -835,12 +834,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) level4_cache_size = handle_intel (_SC_LEVEL4_CACHE_SIZE, cpu_features); - get_common_cache_info (&shared, &shared_per_thread, &threads, core); + get_common_cache_info (&shared, &shared_per_thread, &threads, + level2_cache_size); } else if (cpu_features->basic.kind == arch_kind_zhaoxin) { data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE); - core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE); shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE); shared_per_thread = shared; @@ -849,19 +848,19 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) level1_dcache_size = data; level1_dcache_assoc = handle_zhaoxin (_SC_LEVEL1_DCACHE_ASSOC); level1_dcache_linesize = handle_zhaoxin (_SC_LEVEL1_DCACHE_LINESIZE); - level2_cache_size = core; + level2_cache_size = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE); level2_cache_assoc = handle_zhaoxin (_SC_LEVEL2_CACHE_ASSOC); level2_cache_linesize = handle_zhaoxin (_SC_LEVEL2_CACHE_LINESIZE); level3_cache_size = shared; level3_cache_assoc = handle_zhaoxin (_SC_LEVEL3_CACHE_ASSOC); level3_cache_linesize = handle_zhaoxin (_SC_LEVEL3_CACHE_LINESIZE); - get_common_cache_info (&shared, &shared_per_thread, &threads, core); + get_common_cache_info (&shared, &shared_per_thread, &threads, + level2_cache_size); } else if (cpu_features->basic.kind == arch_kind_amd) { data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); - core = handle_amd (_SC_LEVEL2_CACHE_SIZE); shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE); @@ -869,7 +868,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) level1_dcache_size = data; level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC); level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE); - level2_cache_size = core; + level2_cache_size = handle_amd (_SC_LEVEL2_CACHE_SIZE);; level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC); level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE); level3_cache_size = shared; @@ -880,12 +879,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) if (shared <= 0) { /* No shared L3 cache. All we have is the L2 cache. */ - shared = core; + shared = level2_cache_size; } else if (cpu_features->basic.family < 0x17) { /* Account for exclusive L2 and L3 caches. */ - shared += core; + shared += level2_cache_size; } shared_per_thread = shared; @@ -987,6 +986,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) if (CPU_FEATURE_USABLE_P (cpu_features, FSRM)) rep_movsb_threshold = 2112; + /* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of + cases slower than the vectorized path (and for some alignments, + it is really slow, check BZ #30994). */ + if (cpu_features->basic.kind == arch_kind_amd) + rep_movsb_threshold = non_temporal_threshold; + /* The default threshold to use Enhanced REP STOSB. */ unsigned long int rep_stosb_threshold = 2048; @@ -1028,16 +1033,9 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) SIZE_MAX); unsigned long int rep_movsb_stop_threshold; - /* ERMS feature is implemented from AMD Zen3 architecture and it is - performing poorly for data above L2 cache size. Henceforth, adding - an upper bound threshold parameter to limit the usage of Enhanced - REP MOVSB operations and setting its value to L2 cache size. */ - if (cpu_features->basic.kind == arch_kind_amd) - rep_movsb_stop_threshold = core; /* Setting the upper bound of ERMS to the computed value of - non-temporal threshold for architectures other than AMD. */ - else - rep_movsb_stop_threshold = non_temporal_threshold; + non-temporal threshold for all architectures. */ + rep_movsb_stop_threshold = non_temporal_threshold; cpu_features->data_cache_size = data; cpu_features->shared_cache_size = shared; commit 6484a92698039c4a7a510f0214e22d067b0d78b3 Author: Adhemerval Zanella Date: Thu Feb 8 10:08:39 2024 -0300 x86: Do not prefer ERMS for memset on Zen3+ For AMD Zen3+ architecture, the performance of the vectorized loop is slightly better than ERMS. Checked on x86_64-linux-gnu on Zen3. Reviewed-by: H.J. Lu (cherry picked from commit 272708884cb750f12f5c74a00e6620c19dc6d567) diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h index f34d12846c..5a98f70364 100644 --- a/sysdeps/x86/dl-cacheinfo.h +++ b/sysdeps/x86/dl-cacheinfo.h @@ -1021,6 +1021,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) minimum value is fixed. */ rep_stosb_threshold = TUNABLE_GET (x86_rep_stosb_threshold, long int, NULL); + if (cpu_features->basic.kind == arch_kind_amd + && !TUNABLE_IS_INITIALIZED (x86_rep_stosb_threshold)) + /* For AMD Zen3+ architecture, the performance of the vectorized loop is + slightly better than ERMS. */ + rep_stosb_threshold = SIZE_MAX; TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX); TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX); commit 5d070d12b3a52bc44dd1b71743abc4b6243862ae Author: Adhemerval Zanella Date: Thu Feb 8 10:08:40 2024 -0300 x86: Expand the comment on when REP STOSB is used on memset Reviewed-by: H.J. Lu (cherry picked from commit 491e55beab7457ed310a4a47496f4a333c5d1032) diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S index 9984c3ca0f..97839a2248 100644 --- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S @@ -21,7 +21,9 @@ 2. If size is less than VEC, use integer register stores. 3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores. 4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores. - 5. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with + 5. On machines ERMS feature, if size is greater or equal than + __x86_rep_stosb_threshold then REP STOSB will be used. + 6. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with 4 VEC stores and store 4 * VEC at a time until done. */ #include commit 31c7d69af59da0da80caa74b2ec6ae149013384d Author: Florian Weimer Date: Fri Feb 16 07:40:37 2024 +0100 i386: Use generic memrchr in libc (bug 31316) Before this change, we incorrectly used the SSE2 variant in the implementation, without checking that the system actually supports SSE2. Tested-by: Sam James (cherry picked from commit 0d9166c2245cad4ac520b337dee40c9a583872b6) diff --git a/sysdeps/i386/i686/multiarch/memrchr-c.c b/sysdeps/i386/i686/multiarch/memrchr-c.c index ef7bbbe792..20bfdf3af3 100644 --- a/sysdeps/i386/i686/multiarch/memrchr-c.c +++ b/sysdeps/i386/i686/multiarch/memrchr-c.c @@ -5,3 +5,4 @@ extern void *__memrchr_ia32 (const void *, int, size_t); #endif #include "string/memrchr.c" +strong_alias (__memrchr_ia32, __GI___memrchr) diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2.S b/sysdeps/i386/i686/multiarch/memrchr-sse2.S index d9dae04171..e123f87435 100644 --- a/sysdeps/i386/i686/multiarch/memrchr-sse2.S +++ b/sysdeps/i386/i686/multiarch/memrchr-sse2.S @@ -720,5 +720,4 @@ L(ret_null): ret END (__memrchr_sse2) -strong_alias (__memrchr_sse2, __GI___memrchr) #endif commit b0e0a07018098c2c5927796be5681a298c312626 Author: Joe Ramsay Date: Tue Feb 20 16:44:13 2024 +0000 aarch64/fpu: Sync libmvec routines from 2.39 and before with AOR This includes a fix for big-endian in AdvSIMD log, some cosmetic changes, and numerous small optimisations mainly around inlining and using indexed variants of MLA intrinsics. Reviewed-by: Adhemerval Zanella (cherry picked from commit e302e1021391d13a9611ba3a910df128830bd19e) diff --git a/sysdeps/aarch64/fpu/acos_advsimd.c b/sysdeps/aarch64/fpu/acos_advsimd.c index a8eabb5e71..0a86c9823a 100644 --- a/sysdeps/aarch64/fpu/acos_advsimd.c +++ b/sysdeps/aarch64/fpu/acos_advsimd.c @@ -40,8 +40,8 @@ static const struct data }; #define AllMask v_u64 (0xffffffffffffffff) -#define Oneu (0x3ff0000000000000) -#define Small (0x3e50000000000000) /* 2^-53. */ +#define Oneu 0x3ff0000000000000 +#define Small 0x3e50000000000000 /* 2^-53. */ #if WANT_SIMD_EXCEPT static float64x2_t VPCS_ATTR NOINLINE diff --git a/sysdeps/aarch64/fpu/asin_advsimd.c b/sysdeps/aarch64/fpu/asin_advsimd.c index 141646e954..2de6eff407 100644 --- a/sysdeps/aarch64/fpu/asin_advsimd.c +++ b/sysdeps/aarch64/fpu/asin_advsimd.c @@ -39,8 +39,8 @@ static const struct data }; #define AllMask v_u64 (0xffffffffffffffff) -#define One (0x3ff0000000000000) -#define Small (0x3e50000000000000) /* 2^-12. */ +#define One 0x3ff0000000000000 +#define Small 0x3e50000000000000 /* 2^-12. */ #if WANT_SIMD_EXCEPT static float64x2_t VPCS_ATTR NOINLINE diff --git a/sysdeps/aarch64/fpu/atan2_sve.c b/sysdeps/aarch64/fpu/atan2_sve.c index 09a4c559b8..04fa71fa37 100644 --- a/sysdeps/aarch64/fpu/atan2_sve.c +++ b/sysdeps/aarch64/fpu/atan2_sve.c @@ -37,9 +37,6 @@ static const struct data .pi_over_2 = 0x1.921fb54442d18p+0, }; -/* Useful constants. */ -#define SignMask sv_u64 (0x8000000000000000) - /* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */ static svfloat64_t NOINLINE special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret, @@ -72,14 +69,15 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) svbool_t cmp_y = zeroinfnan (iy, pg); svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y); - svuint64_t sign_x = svand_x (pg, ix, SignMask); - svuint64_t sign_y = svand_x (pg, iy, SignMask); - svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y); - svfloat64_t ax = svabs_x (pg, x); svfloat64_t ay = svabs_x (pg, y); + svuint64_t iax = svreinterpret_u64 (ax); + svuint64_t iay = svreinterpret_u64 (ay); + + svuint64_t sign_x = sveor_x (pg, ix, iax); + svuint64_t sign_y = sveor_x (pg, iy, iay); + svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y); - svbool_t pred_xlt0 = svcmplt (pg, x, 0.0); svbool_t pred_aygtax = svcmpgt (pg, ay, ax); /* Set up z for call to atan. */ @@ -88,8 +86,9 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) svfloat64_t z = svdiv_x (pg, n, d); /* Work out the correct shift. */ - svfloat64_t shift = svsel (pred_xlt0, sv_f64 (-2.0), sv_f64 (0.0)); - shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift); + svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1)); + shift = svsel (pred_aygtax, sv_f64 (1.0), shift); + shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift))); shift = svmul_x (pg, shift, data_ptr->pi_over_2); /* Use split Estrin scheme for P(z^2) with deg(P)=19. */ @@ -109,10 +108,10 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) ret = svadd_m (pg, ret, shift); /* Account for the sign of x and y. */ - ret = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)); - if (__glibc_unlikely (svptest_any (pg, cmp_xy))) - return special_case (y, x, ret, cmp_xy); - - return ret; + return special_case ( + y, x, + svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)), + cmp_xy); + return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)); } diff --git a/sysdeps/aarch64/fpu/atan2f_sve.c b/sysdeps/aarch64/fpu/atan2f_sve.c index b92f83cdea..9ea197147c 100644 --- a/sysdeps/aarch64/fpu/atan2f_sve.c +++ b/sysdeps/aarch64/fpu/atan2f_sve.c @@ -32,10 +32,8 @@ static const struct data .pi_over_2 = 0x1.921fb6p+0f, }; -#define SignMask sv_u32 (0x80000000) - /* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */ -static inline svfloat32_t +static svfloat32_t NOINLINE special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret, const svbool_t cmp) { @@ -67,14 +65,15 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) svbool_t cmp_y = zeroinfnan (iy, pg); svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y); - svuint32_t sign_x = svand_x (pg, ix, SignMask); - svuint32_t sign_y = svand_x (pg, iy, SignMask); - svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y); - svfloat32_t ax = svabs_x (pg, x); svfloat32_t ay = svabs_x (pg, y); + svuint32_t iax = svreinterpret_u32 (ax); + svuint32_t iay = svreinterpret_u32 (ay); + + svuint32_t sign_x = sveor_x (pg, ix, iax); + svuint32_t sign_y = sveor_x (pg, iy, iay); + svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y); - svbool_t pred_xlt0 = svcmplt (pg, x, 0.0); svbool_t pred_aygtax = svcmpgt (pg, ay, ax); /* Set up z for call to atan. */ @@ -83,11 +82,12 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) svfloat32_t z = svdiv_x (pg, n, d); /* Work out the correct shift. */ - svfloat32_t shift = svsel (pred_xlt0, sv_f32 (-2.0), sv_f32 (0.0)); - shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift); + svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1)); + shift = svsel (pred_aygtax, sv_f32 (1.0), shift); + shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift))); shift = svmul_x (pg, shift, sv_f32 (data_ptr->pi_over_2)); - /* Use split Estrin scheme for P(z^2) with deg(P)=7. */ + /* Use pure Estrin scheme for P(z^2) with deg(P)=7. */ svfloat32_t z2 = svmul_x (pg, z, z); svfloat32_t z4 = svmul_x (pg, z2, z2); svfloat32_t z8 = svmul_x (pg, z4, z4); @@ -101,10 +101,12 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) ret = svadd_m (pg, ret, shift); /* Account for the sign of x and y. */ - ret = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)); if (__glibc_unlikely (svptest_any (pg, cmp_xy))) - return special_case (y, x, ret, cmp_xy); + return special_case ( + y, x, + svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)), + cmp_xy); - return ret; + return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)); } diff --git a/sysdeps/aarch64/fpu/cos_advsimd.c b/sysdeps/aarch64/fpu/cos_advsimd.c index 2897e8b909..3924c9ce44 100644 --- a/sysdeps/aarch64/fpu/cos_advsimd.c +++ b/sysdeps/aarch64/fpu/cos_advsimd.c @@ -63,8 +63,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x) special-case handler later. */ r = vbslq_f64 (cmp, v_f64 (1.0), r); #else - cmp = vcageq_f64 (d->range_val, x); - cmp = vceqzq_u64 (cmp); /* cmp = ~cmp. */ + cmp = vcageq_f64 (x, d->range_val); r = x; #endif diff --git a/sysdeps/aarch64/fpu/cosf_advsimd.c b/sysdeps/aarch64/fpu/cosf_advsimd.c index 60abc8dfcf..d0c285b03a 100644 --- a/sysdeps/aarch64/fpu/cosf_advsimd.c +++ b/sysdeps/aarch64/fpu/cosf_advsimd.c @@ -64,8 +64,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cos) (float32x4_t x) special-case handler later. */ r = vbslq_f32 (cmp, v_f32 (1.0f), r); #else - cmp = vcageq_f32 (d->range_val, x); - cmp = vceqzq_u32 (cmp); /* cmp = ~cmp. */ + cmp = vcageq_f32 (x, d->range_val); r = x; #endif diff --git a/sysdeps/aarch64/fpu/exp10_advsimd.c b/sysdeps/aarch64/fpu/exp10_advsimd.c index fe7149b191..eeb31ca839 100644 --- a/sysdeps/aarch64/fpu/exp10_advsimd.c +++ b/sysdeps/aarch64/fpu/exp10_advsimd.c @@ -57,7 +57,7 @@ const static struct data # define BigBound v_u64 (0x4070000000000000) /* asuint64 (0x1p8). */ # define Thres v_u64 (0x2070000000000000) /* BigBound - TinyBound. */ -static inline float64x2_t VPCS_ATTR +static float64x2_t VPCS_ATTR NOINLINE special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) { /* If fenv exceptions are to be triggered correctly, fall back to the scalar @@ -72,7 +72,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) # define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769. */ # define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254. */ -static float64x2_t VPCS_ATTR NOINLINE +static inline float64x2_t VPCS_ATTR special_case (float64x2_t s, float64x2_t y, float64x2_t n, const struct data *d) { diff --git a/sysdeps/aarch64/fpu/exp10f_advsimd.c b/sysdeps/aarch64/fpu/exp10f_advsimd.c index 7ee0c90948..ab117b69da 100644 --- a/sysdeps/aarch64/fpu/exp10f_advsimd.c +++ b/sysdeps/aarch64/fpu/exp10f_advsimd.c @@ -25,7 +25,8 @@ static const struct data { float32x4_t poly[5]; - float32x4_t shift, log10_2, log2_10_hi, log2_10_lo; + float32x4_t log10_2_and_inv, shift; + #if !WANT_SIMD_EXCEPT float32x4_t scale_thresh; #endif @@ -38,9 +39,9 @@ static const struct data .poly = { V4 (0x1.26bb16p+1f), V4 (0x1.5350d2p+1f), V4 (0x1.04744ap+1f), V4 (0x1.2d8176p+0f), V4 (0x1.12b41ap-1f) }, .shift = V4 (0x1.8p23f), - .log10_2 = V4 (0x1.a934fp+1), - .log2_10_hi = V4 (0x1.344136p-2), - .log2_10_lo = V4 (-0x1.ec10cp-27), + + /* Stores constants 1/log10(2), log10(2)_high, log10(2)_low, 0. */ + .log10_2_and_inv = { 0x1.a934fp+1, 0x1.344136p-2, -0x1.ec10cp-27, 0 }, #if !WANT_SIMD_EXCEPT .scale_thresh = V4 (ScaleBound) #endif @@ -98,24 +99,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp10) (float32x4_t x) #if WANT_SIMD_EXCEPT /* asuint(x) - TinyBound >= BigBound - TinyBound. */ uint32x4_t cmp = vcgeq_u32 ( - vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)), - TinyBound), - Thres); + vsubq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (x)), TinyBound), Thres); float32x4_t xm = x; /* If any lanes are special, mask them with 1 and retain a copy of x to allow special case handler to fix special lanes later. This is only necessary if fenv exceptions are to be triggered correctly. */ if (__glibc_unlikely (v_any_u32 (cmp))) - x = vbslq_f32 (cmp, v_f32 (1), x); + x = v_zerofy_f32 (x, cmp); #endif /* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)), with poly(r) in [1/sqrt(2), sqrt(2)] and x = r + n * log10 (2), with r in [-log10(2)/2, log10(2)/2]. */ - float32x4_t z = vfmaq_f32 (d->shift, x, d->log10_2); + float32x4_t z = vfmaq_laneq_f32 (d->shift, x, d->log10_2_and_inv, 0); float32x4_t n = vsubq_f32 (z, d->shift); - float32x4_t r = vfmsq_f32 (x, n, d->log2_10_hi); - r = vfmsq_f32 (r, n, d->log2_10_lo); + float32x4_t r = vfmsq_laneq_f32 (x, n, d->log10_2_and_inv, 1); + r = vfmsq_laneq_f32 (r, n, d->log10_2_and_inv, 2); uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23); float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias)); diff --git a/sysdeps/aarch64/fpu/exp2_advsimd.c b/sysdeps/aarch64/fpu/exp2_advsimd.c index 391a93180c..ae1e63d503 100644 --- a/sysdeps/aarch64/fpu/exp2_advsimd.c +++ b/sysdeps/aarch64/fpu/exp2_advsimd.c @@ -24,6 +24,7 @@ #define IndexMask (N - 1) #define BigBound 1022.0 #define UOFlowBound 1280.0 +#define TinyBound 0x2000000000000000 /* asuint64(0x1p-511). */ static const struct data { @@ -48,14 +49,13 @@ lookup_sbits (uint64x2_t i) #if WANT_SIMD_EXCEPT -# define TinyBound 0x2000000000000000 /* asuint64(0x1p-511). */ # define Thres 0x2080000000000000 /* asuint64(512.0) - TinyBound. */ /* Call scalar exp2 as a fallback. */ static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t x) +special_case (float64x2_t x, float64x2_t y, uint64x2_t is_special) { - return v_call_f64 (exp2, x, x, v_u64 (0xffffffffffffffff)); + return v_call_f64 (exp2, x, y, is_special); } #else @@ -65,7 +65,7 @@ special_case (float64x2_t x) # define SpecialBias1 0x7000000000000000 /* 0x1p769. */ # define SpecialBias2 0x3010000000000000 /* 0x1p-254. */ -static float64x2_t VPCS_ATTR +static inline float64x2_t VPCS_ATTR special_case (float64x2_t s, float64x2_t y, float64x2_t n, const struct data *d) { @@ -94,10 +94,10 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x) #if WANT_SIMD_EXCEPT uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x)); cmp = vcgeq_u64 (vsubq_u64 (ia, v_u64 (TinyBound)), v_u64 (Thres)); - /* If any special case (inf, nan, small and large x) is detected, - fall back to scalar for all lanes. */ - if (__glibc_unlikely (v_any_u64 (cmp))) - return special_case (x); + /* Mask special lanes and retain a copy of x for passing to special-case + handler. */ + float64x2_t xc = x; + x = v_zerofy_f64 (x, cmp); #else cmp = vcagtq_f64 (x, d->scale_big_bound); #endif @@ -120,9 +120,11 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x) float64x2_t y = v_pairwise_poly_3_f64 (r, r2, d->poly); y = vmulq_f64 (r, y); -#if !WANT_SIMD_EXCEPT if (__glibc_unlikely (v_any_u64 (cmp))) +#if !WANT_SIMD_EXCEPT return special_case (s, y, n, d); +#else + return special_case (xc, vfmaq_f64 (s, s, y), cmp); #endif return vfmaq_f64 (s, s, y); } diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c index 9a5a523a10..8a686e3e05 100644 --- a/sysdeps/aarch64/fpu/exp2f_sve.c +++ b/sysdeps/aarch64/fpu/exp2f_sve.c @@ -20,6 +20,8 @@ #include "sv_math.h" #include "poly_sve_f32.h" +#define Thres 0x1.5d5e2ap+6f + static const struct data { float poly[5]; @@ -33,7 +35,7 @@ static const struct data .shift = 0x1.903f8p17f, /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled correctly by FEXPA. */ - .thres = 0x1.5d5e2ap+6f, + .thres = Thres, }; static svfloat32_t NOINLINE diff --git a/sysdeps/aarch64/fpu/exp_advsimd.c b/sysdeps/aarch64/fpu/exp_advsimd.c index fd215f1d2c..5e3a9a0d44 100644 --- a/sysdeps/aarch64/fpu/exp_advsimd.c +++ b/sysdeps/aarch64/fpu/exp_advsimd.c @@ -54,7 +54,7 @@ const static volatile struct # define BigBound v_u64 (0x4080000000000000) /* asuint64 (0x1p9). */ # define SpecialBound v_u64 (0x2080000000000000) /* BigBound - TinyBound. */ -static inline float64x2_t VPCS_ATTR +static float64x2_t VPCS_ATTR NOINLINE special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) { /* If fenv exceptions are to be triggered correctly, fall back to the scalar @@ -69,7 +69,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) # define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769. */ # define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254. */ -static float64x2_t VPCS_ATTR NOINLINE +static inline float64x2_t VPCS_ATTR special_case (float64x2_t s, float64x2_t y, float64x2_t n) { /* 2^(n/N) may overflow, break it up into s1*s2. */ diff --git a/sysdeps/aarch64/fpu/expm1_advsimd.c b/sysdeps/aarch64/fpu/expm1_advsimd.c index 0b85bd06f3..3628398674 100644 --- a/sysdeps/aarch64/fpu/expm1_advsimd.c +++ b/sysdeps/aarch64/fpu/expm1_advsimd.c @@ -23,7 +23,7 @@ static const struct data { float64x2_t poly[11]; - float64x2_t invln2, ln2_lo, ln2_hi, shift; + float64x2_t invln2, ln2, shift; int64x2_t exponent_bias; #if WANT_SIMD_EXCEPT uint64x2_t thresh, tiny_bound; @@ -38,8 +38,7 @@ static const struct data V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22), V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29) }, .invln2 = V2 (0x1.71547652b82fep0), - .ln2_hi = V2 (0x1.62e42fefa39efp-1), - .ln2_lo = V2 (0x1.abc9e3b39803fp-56), + .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 }, .shift = V2 (0x1.8p52), .exponent_bias = V2 (0x3ff0000000000000), #if WANT_SIMD_EXCEPT @@ -83,7 +82,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x) x = v_zerofy_f64 (x, special); #else /* Large input, NaNs and Infs. */ - uint64x2_t special = vceqzq_u64 (vcaltq_f64 (x, d->oflow_bound)); + uint64x2_t special = vcageq_f64 (x, d->oflow_bound); #endif /* Reduce argument to smaller range: @@ -93,8 +92,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x) where 2^i is exact because i is an integer. */ float64x2_t n = vsubq_f64 (vfmaq_f64 (d->shift, d->invln2, x), d->shift); int64x2_t i = vcvtq_s64_f64 (n); - float64x2_t f = vfmsq_f64 (x, n, d->ln2_hi); - f = vfmsq_f64 (f, n, d->ln2_lo); + float64x2_t f = vfmsq_laneq_f64 (x, n, d->ln2, 0); + f = vfmsq_laneq_f64 (f, n, d->ln2, 1); /* Approximate expm1(f) using polynomial. Taylor expansion for expm1(x) has the form: diff --git a/sysdeps/aarch64/fpu/expm1f_advsimd.c b/sysdeps/aarch64/fpu/expm1f_advsimd.c index 8d4c9a2193..93db200f61 100644 --- a/sysdeps/aarch64/fpu/expm1f_advsimd.c +++ b/sysdeps/aarch64/fpu/expm1f_advsimd.c @@ -23,7 +23,8 @@ static const struct data { float32x4_t poly[5]; - float32x4_t invln2, ln2_lo, ln2_hi, shift; + float32x4_t invln2_and_ln2; + float32x4_t shift; int32x4_t exponent_bias; #if WANT_SIMD_EXCEPT uint32x4_t thresh; @@ -34,9 +35,8 @@ static const struct data /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2]. */ .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5), V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) }, - .invln2 = V4 (0x1.715476p+0f), - .ln2_hi = V4 (0x1.62e4p-1f), - .ln2_lo = V4 (0x1.7f7d1cp-20f), + /* Stores constants: invln2, ln2_hi, ln2_lo, 0. */ + .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, .shift = V4 (0x1.8p23f), .exponent_bias = V4 (0x3f800000), #if !WANT_SIMD_EXCEPT @@ -80,7 +80,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x) x = v_zerofy_f32 (x, special); #else /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf. */ - uint32x4_t special = vceqzq_u32 (vcaltq_f32 (x, d->oflow_bound)); + uint32x4_t special = vcagtq_f32 (x, d->oflow_bound); #endif /* Reduce argument to smaller range: @@ -88,10 +88,11 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x) and f = x - i * ln2, then f is in [-ln2/2, ln2/2]. exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 where 2^i is exact because i is an integer. */ - float32x4_t j = vsubq_f32 (vfmaq_f32 (d->shift, d->invln2, x), d->shift); + float32x4_t j = vsubq_f32 ( + vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift); int32x4_t i = vcvtq_s32_f32 (j); - float32x4_t f = vfmsq_f32 (x, j, d->ln2_hi); - f = vfmsq_f32 (f, j, d->ln2_lo); + float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1); + f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2); /* Approximate expm1(f) using polynomial. Taylor expansion for expm1(x) has the form: diff --git a/sysdeps/aarch64/fpu/log_advsimd.c b/sysdeps/aarch64/fpu/log_advsimd.c index 067ae79613..21df61728c 100644 --- a/sysdeps/aarch64/fpu/log_advsimd.c +++ b/sysdeps/aarch64/fpu/log_advsimd.c @@ -58,8 +58,13 @@ lookup (uint64x2_t i) uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask; float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc); float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc); +#if __BYTE_ORDER == __LITTLE_ENDIAN e.invc = vuzp1q_f64 (e0, e1); e.logc = vuzp2q_f64 (e0, e1); +#else + e.invc = vuzp1q_f64 (e1, e0); + e.logc = vuzp2q_f64 (e1, e0); +#endif return e; } diff --git a/sysdeps/aarch64/fpu/sin_advsimd.c b/sysdeps/aarch64/fpu/sin_advsimd.c index efce183e86..a0d9d3b819 100644 --- a/sysdeps/aarch64/fpu/sin_advsimd.c +++ b/sysdeps/aarch64/fpu/sin_advsimd.c @@ -75,8 +75,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x) r = vbslq_f64 (cmp, vreinterpretq_f64_u64 (cmp), x); #else r = x; - cmp = vcageq_f64 (d->range_val, x); - cmp = vceqzq_u64 (cmp); /* cmp = ~cmp. */ + cmp = vcageq_f64 (x, d->range_val); #endif /* n = rint(|x|/pi). */ diff --git a/sysdeps/aarch64/fpu/sinf_advsimd.c b/sysdeps/aarch64/fpu/sinf_advsimd.c index 60cf3f2ca1..375dfc3331 100644 --- a/sysdeps/aarch64/fpu/sinf_advsimd.c +++ b/sysdeps/aarch64/fpu/sinf_advsimd.c @@ -67,8 +67,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sin) (float32x4_t x) r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x); #else r = x; - cmp = vcageq_f32 (d->range_val, x); - cmp = vceqzq_u32 (cmp); /* cmp = ~cmp. */ + cmp = vcageq_f32 (x, d->range_val); #endif /* n = rint(|x|/pi) */ diff --git a/sysdeps/aarch64/fpu/tan_advsimd.c b/sysdeps/aarch64/fpu/tan_advsimd.c index d7e5ba7b1a..0459821ab2 100644 --- a/sysdeps/aarch64/fpu/tan_advsimd.c +++ b/sysdeps/aarch64/fpu/tan_advsimd.c @@ -23,7 +23,7 @@ static const struct data { float64x2_t poly[9]; - float64x2_t half_pi_hi, half_pi_lo, two_over_pi, shift; + float64x2_t half_pi, two_over_pi, shift; #if !WANT_SIMD_EXCEPT float64x2_t range_val; #endif @@ -34,8 +34,7 @@ static const struct data V2 (0x1.226e5e5ecdfa3p-7), V2 (0x1.d6c7ddbf87047p-9), V2 (0x1.7ea75d05b583ep-10), V2 (0x1.289f22964a03cp-11), V2 (0x1.4e4fd14147622p-12) }, - .half_pi_hi = V2 (0x1.921fb54442d18p0), - .half_pi_lo = V2 (0x1.1a62633145c07p-54), + .half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 }, .two_over_pi = V2 (0x1.45f306dc9c883p-1), .shift = V2 (0x1.8p52), #if !WANT_SIMD_EXCEPT @@ -56,15 +55,15 @@ special_case (float64x2_t x) /* Vector approximation for double-precision tan. Maximum measured error is 3.48 ULP: - __v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37 - want -0x1.f6ccd8ecf7deap+37. */ + _ZGVnN2v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37 + want -0x1.f6ccd8ecf7deap+37. */ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) { const struct data *dat = ptr_barrier (&data); - /* Our argument reduction cannot calculate q with sufficient accuracy for very - large inputs. Fall back to scalar routine for all lanes if any are too - large, or Inf/NaN. If fenv exceptions are expected, also fall back for tiny - input to avoid underflow. */ + /* Our argument reduction cannot calculate q with sufficient accuracy for + very large inputs. Fall back to scalar routine for all lanes if any are + too large, or Inf/NaN. If fenv exceptions are expected, also fall back for + tiny input to avoid underflow. */ #if WANT_SIMD_EXCEPT uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x)); /* iax - tiny_bound > range_val - tiny_bound. */ @@ -82,8 +81,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) /* Use q to reduce x to r in [-pi/4, pi/4], by: r = x - q * pi/2, in extended precision. */ float64x2_t r = x; - r = vfmsq_f64 (r, q, dat->half_pi_hi); - r = vfmsq_f64 (r, q, dat->half_pi_lo); + r = vfmsq_laneq_f64 (r, q, dat->half_pi, 0); + r = vfmsq_laneq_f64 (r, q, dat->half_pi, 1); /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle formula. */ r = vmulq_n_f64 (r, 0.5); @@ -106,14 +105,15 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) and reciprocity around pi/2: tan(x) = 1 / (tan(pi/2 - x)) to assemble result using change-of-sign and conditional selection of - numerator/denominator, dependent on odd/even-ness of q (hence quadrant). */ + numerator/denominator, dependent on odd/even-ness of q (hence quadrant). + */ float64x2_t n = vfmaq_f64 (v_f64 (-1), p, p); float64x2_t d = vaddq_f64 (p, p); uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1)); #if !WANT_SIMD_EXCEPT - uint64x2_t special = vceqzq_u64 (vcaleq_f64 (x, dat->range_val)); + uint64x2_t special = vcageq_f64 (x, dat->range_val); if (__glibc_unlikely (v_any_u64 (special))) return special_case (x); #endif diff --git a/sysdeps/aarch64/fpu/tanf_advsimd.c b/sysdeps/aarch64/fpu/tanf_advsimd.c index 1f16103f8a..5a7489390a 100644 --- a/sysdeps/aarch64/fpu/tanf_advsimd.c +++ b/sysdeps/aarch64/fpu/tanf_advsimd.c @@ -23,7 +23,8 @@ static const struct data { float32x4_t poly[6]; - float32x4_t neg_half_pi_1, neg_half_pi_2, neg_half_pi_3, two_over_pi, shift; + float32x4_t pi_consts; + float32x4_t shift; #if !WANT_SIMD_EXCEPT float32x4_t range_val; #endif @@ -31,10 +32,9 @@ static const struct data /* Coefficients generated using FPMinimax. */ .poly = { V4 (0x1.55555p-2f), V4 (0x1.11166p-3f), V4 (0x1.b88a78p-5f), V4 (0x1.7b5756p-6f), V4 (0x1.4ef4cep-8f), V4 (0x1.0e1e74p-7f) }, - .neg_half_pi_1 = V4 (-0x1.921fb6p+0f), - .neg_half_pi_2 = V4 (0x1.777a5cp-25f), - .neg_half_pi_3 = V4 (0x1.ee59dap-50f), - .two_over_pi = V4 (0x1.45f306p-1f), + /* Stores constants: (-pi/2)_high, (-pi/2)_mid, (-pi/2)_low, and 2/pi. */ + .pi_consts + = { -0x1.921fb6p+0f, 0x1.777a5cp-25f, 0x1.ee59dap-50f, 0x1.45f306p-1f }, .shift = V4 (0x1.8p+23f), #if !WANT_SIMD_EXCEPT .range_val = V4 (0x1p15f), @@ -58,10 +58,11 @@ eval_poly (float32x4_t z, const struct data *d) { float32x4_t z2 = vmulq_f32 (z, z); #if WANT_SIMD_EXCEPT - /* Tiny z (<= 0x1p-31) will underflow when calculating z^4. If fp exceptions - are to be triggered correctly, sidestep this by fixing such lanes to 0. */ + /* Tiny z (<= 0x1p-31) will underflow when calculating z^4. + If fp exceptions are to be triggered correctly, + sidestep this by fixing such lanes to 0. */ uint32x4_t will_uflow - = vcleq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (z)), TinyBound); + = vcleq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (z)), TinyBound); if (__glibc_unlikely (v_any_u32 (will_uflow))) z2 = vbslq_f32 (will_uflow, v_f32 (0), z2); #endif @@ -94,16 +95,16 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tan) (float32x4_t x) #endif /* n = rint(x/(pi/2)). */ - float32x4_t q = vfmaq_f32 (d->shift, d->two_over_pi, x); + float32x4_t q = vfmaq_laneq_f32 (d->shift, x, d->pi_consts, 3); float32x4_t n = vsubq_f32 (q, d->shift); /* Determine if x lives in an interval, where |tan(x)| grows to infinity. */ uint32x4_t pred_alt = vtstq_u32 (vreinterpretq_u32_f32 (q), v_u32 (1)); /* r = x - n * (pi/2) (range reduction into -pi./4 .. pi/4). */ float32x4_t r; - r = vfmaq_f32 (x, d->neg_half_pi_1, n); - r = vfmaq_f32 (r, d->neg_half_pi_2, n); - r = vfmaq_f32 (r, d->neg_half_pi_3, n); + r = vfmaq_laneq_f32 (x, n, d->pi_consts, 0); + r = vfmaq_laneq_f32 (r, n, d->pi_consts, 1); + r = vfmaq_laneq_f32 (r, n, d->pi_consts, 2); /* If x lives in an interval, where |tan(x)| - is finite, then use a polynomial approximation of the form commit 395a89f61e19fa916ae4cc93fc10d81a28ce3039 Author: Szabolcs Nagy Date: Wed Mar 13 14:34:14 2024 +0000 aarch64: fix check for SVE support in assembler Due to GCC bug 110901 -mcpu can override -march setting when compiling asm code and thus a compiler targetting a specific cpu can fail the configure check even when binutils gas supports SVE. The workaround is that explicit .arch directive overrides both -mcpu and -march, and since that's what the actual SVE memcpy uses the configure check should use that too even if the GCC issue is fixed independently. Reviewed-by: Florian Weimer (cherry picked from commit 73c26018ed0ecd9c807bb363cc2c2ab4aca66a82) diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure old mode 100644 new mode 100755 index ca57edce47..9606137e8d --- a/sysdeps/aarch64/configure +++ b/sysdeps/aarch64/configure @@ -325,9 +325,10 @@ then : printf %s "(cached) " >&6 else $as_nop cat > conftest.s <<\EOF - ptrue p0.b + .arch armv8.2-a+sve + ptrue p0.b EOF -if { ac_try='${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&5' +if { ac_try='${CC-cc} -c conftest.s 1>&5' { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 (eval $ac_try) 2>&5 ac_status=$? diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac index 27874eceb4..56d12d661d 100644 --- a/sysdeps/aarch64/configure.ac +++ b/sysdeps/aarch64/configure.ac @@ -90,9 +90,10 @@ LIBC_CONFIG_VAR([aarch64-variant-pcs], [$libc_cv_aarch64_variant_pcs]) # Check if asm support armv8.2-a+sve AC_CACHE_CHECK([for SVE support in assembler], [libc_cv_aarch64_sve_asm], [dnl cat > conftest.s <<\EOF - ptrue p0.b + .arch armv8.2-a+sve + ptrue p0.b EOF -if AC_TRY_COMMAND(${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&AS_MESSAGE_LOG_FD); then +if AC_TRY_COMMAND(${CC-cc} -c conftest.s 1>&AS_MESSAGE_LOG_FD); then libc_cv_aarch64_sve_asm=yes else libc_cv_aarch64_sve_asm=no commit 9d92452c70805a2e2dbbdb2b1ffc34bd86e1c8df Author: Wilco Dijkstra Date: Thu Mar 21 16:48:33 2024 +0000 AArch64: Check kernel version for SVE ifuncs Old Linux kernels disable SVE after every system call. Calling the SVE-optimized memcpy afterwards will then cause a trap to reenable SVE. As a result, applications with a high use of syscalls may run slower with the SVE memcpy. This is true for kernels between 4.15.0 and before 6.2.0, except for 5.14.0 which was patched. Avoid this by checking the kernel version and selecting the SVE ifunc on modern kernels. Parse the kernel version reported by uname() into a 24-bit kernel.major.minor value without calling any library functions. If uname() is not supported or if the version format is not recognized, assume the kernel is modern. Tested-by: Florian Weimer Reviewed-by: Szabolcs Nagy (cherry picked from commit 2e94e2f5d2bf2de124c8ad7da85463355e54ccb2) diff --git a/sysdeps/aarch64/cpu-features.h b/sysdeps/aarch64/cpu-features.h index 77a782422a..5f2da91ebb 100644 --- a/sysdeps/aarch64/cpu-features.h +++ b/sysdeps/aarch64/cpu-features.h @@ -71,6 +71,7 @@ struct cpu_features /* Currently, the GLIBC memory tagging tunable only defines 8 bits. */ uint8_t mte_state; bool sve; + bool prefer_sve_ifuncs; bool mops; }; diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h index c52860efb2..61dc40088f 100644 --- a/sysdeps/aarch64/multiarch/init-arch.h +++ b/sysdeps/aarch64/multiarch/init-arch.h @@ -36,5 +36,7 @@ MTE_ENABLED (); \ bool __attribute__((unused)) sve = \ GLRO(dl_aarch64_cpu_features).sve; \ + bool __attribute__((unused)) prefer_sve_ifuncs = \ + GLRO(dl_aarch64_cpu_features).prefer_sve_ifuncs; \ bool __attribute__((unused)) mops = \ GLRO(dl_aarch64_cpu_features).mops; diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c index d12eccfca5..ce53567dab 100644 --- a/sysdeps/aarch64/multiarch/memcpy.c +++ b/sysdeps/aarch64/multiarch/memcpy.c @@ -47,7 +47,7 @@ select_memcpy_ifunc (void) { if (IS_A64FX (midr)) return __memcpy_a64fx; - return __memcpy_sve; + return prefer_sve_ifuncs ? __memcpy_sve : __memcpy_generic; } if (IS_THUNDERX (midr)) diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c index 2081eeb4d4..fe95037be3 100644 --- a/sysdeps/aarch64/multiarch/memmove.c +++ b/sysdeps/aarch64/multiarch/memmove.c @@ -47,7 +47,7 @@ select_memmove_ifunc (void) { if (IS_A64FX (midr)) return __memmove_a64fx; - return __memmove_sve; + return prefer_sve_ifuncs ? __memmove_sve : __memmove_generic; } if (IS_THUNDERX (midr)) diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c index b1a3f673f0..c0b047bc0d 100644 --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #define DCZID_DZP_MASK (1 << 4) @@ -62,6 +63,46 @@ get_midr_from_mcpu (const struct tunable_str_t *mcpu) return UINT64_MAX; } +#if __LINUX_KERNEL_VERSION < 0x060200 + +/* Return true if we prefer using SVE in string ifuncs. Old kernels disable + SVE after every system call which results in unnecessary traps if memcpy + uses SVE. This is true for kernels between 4.15.0 and before 6.2.0, except + for 5.14.0 which was patched. For these versions return false to avoid using + SVE ifuncs. + Parse the kernel version into a 24-bit kernel.major.minor value without + calling any library functions. If uname() is not supported or if the version + format is not recognized, assume the kernel is modern and return true. */ + +static inline bool +prefer_sve_ifuncs (void) +{ + struct utsname buf; + const char *p = &buf.release[0]; + int kernel = 0; + int val; + + if (__uname (&buf) < 0) + return true; + + for (int shift = 16; shift >= 0; shift -= 8) + { + for (val = 0; *p >= '0' && *p <= '9'; p++) + val = val * 10 + *p - '0'; + kernel |= (val & 255) << shift; + if (*p++ != '.') + break; + } + + if (kernel >= 0x060200 || kernel == 0x050e00) + return true; + if (kernel >= 0x040f00) + return false; + return true; +} + +#endif + static inline void init_cpu_features (struct cpu_features *cpu_features) { @@ -126,6 +167,13 @@ init_cpu_features (struct cpu_features *cpu_features) /* Check if SVE is supported. */ cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE; + cpu_features->prefer_sve_ifuncs = cpu_features->sve; + +#if __LINUX_KERNEL_VERSION < 0x060200 + if (cpu_features->sve) + cpu_features->prefer_sve_ifuncs = prefer_sve_ifuncs (); +#endif + /* Check if MOPS is supported. */ cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS; } commit 9883f4304cfb1558d0f1e6d9f48c4ab0a35355fe Author: H.J. Lu Date: Wed Feb 28 09:51:14 2024 -0800 x86-64: Don't use SSE resolvers for ISA level 3 or above When glibc is built with ISA level 3 or above enabled, SSE resolvers aren't available and glibc fails to build: ld: .../elf/librtld.os: in function `init_cpu_features': .../elf/../sysdeps/x86/cpu-features.c:1200:(.text+0x1445f): undefined reference to `_dl_runtime_resolve_fxsave' ld: .../elf/librtld.os: relocation R_X86_64_PC32 against undefined hidden symbol `_dl_runtime_resolve_fxsave' can not be used when making a shared object /usr/local/bin/ld: final link failed: bad value For ISA level 3 or above, don't use _dl_runtime_resolve_fxsave nor _dl_tlsdesc_dynamic_fxsave. This fixes BZ #31429. Reviewed-by: Noah Goldstein (cherry picked from commit befe2d3c4dec8be2cdd01a47132e47bdb7020922) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 6fe1b728c6..b8abe733ab 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -1198,7 +1199,9 @@ no_cpuid: TUNABLE_CALLBACK (set_x86_shstk)); #endif +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) +#endif { if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) { @@ -1219,22 +1222,24 @@ no_cpuid: #endif } } +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL else { -#ifdef __x86_64__ +# ifdef __x86_64__ GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave; -# ifdef SHARED +# ifdef SHARED GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; -# endif -#else -# ifdef SHARED +# endif +# else +# ifdef SHARED if (CPU_FEATURE_USABLE_P (cpu_features, FXSR)) GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; else GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave; +# endif # endif -#endif } +#endif #ifdef SHARED # ifdef __x86_64__ diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S index ea69f5223a..057a10862a 100644 --- a/sysdeps/x86_64/dl-tlsdesc.S +++ b/sysdeps/x86_64/dl-tlsdesc.S @@ -20,6 +20,7 @@ #include #include #include +#include #include "tlsdesc.h" #include "dl-trampoline-save.h" @@ -79,12 +80,14 @@ _dl_tlsdesc_undefweak: .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak #ifdef SHARED -# define USE_FXSAVE -# define STATE_SAVE_ALIGNMENT 16 -# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave -# include "dl-tlsdesc-dynamic.h" -# undef _dl_tlsdesc_dynamic -# undef USE_FXSAVE +# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL +# define USE_FXSAVE +# define STATE_SAVE_ALIGNMENT 16 +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave +# include "dl-tlsdesc-dynamic.h" +# undef _dl_tlsdesc_dynamic +# undef USE_FXSAVE +# endif # define USE_XSAVE # define STATE_SAVE_ALIGNMENT 64 commit 7b92f46f04c6cbce19d19ae1099628431858996c Author: Sunil K Pandey Date: Thu Feb 29 17:57:02 2024 -0800 x86-64: Simplify minimum ISA check ifdef conditional with if Replace minimum ISA check ifdef conditional with if. Since MINIMUM_X86_ISA_LEVEL and AVX_X86_ISA_LEVEL are compile time constants, compiler will perform constant folding optimization, getting same results. Reviewed-by: H.J. Lu (cherry picked from commit b6e3898194bbae78910bbe9cd086937014961e45) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index b8abe733ab..3d7c2819d7 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -1199,9 +1199,8 @@ no_cpuid: TUNABLE_CALLBACK (set_x86_shstk)); #endif -#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL - if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) -#endif + if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL + || (GLRO(dl_x86_cpu_features).xsave_state_size != 0)) { if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) { @@ -1222,24 +1221,22 @@ no_cpuid: #endif } } -#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL else { -# ifdef __x86_64__ +#ifdef __x86_64__ GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave; -# ifdef SHARED +# ifdef SHARED GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; -# endif -# else -# ifdef SHARED +# endif +#else +# ifdef SHARED if (CPU_FEATURE_USABLE_P (cpu_features, FXSR)) GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; else GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave; -# endif # endif - } #endif + } #ifdef SHARED # ifdef __x86_64__ commit edb9a76e3008725e9dc035d38a58e849a3bde0f1 Author: Florian Weimer Date: Sun Apr 14 08:24:51 2024 +0200 powerpc: Fix ld.so address determination for PCREL mode (bug 31640) This seems to have stopped working with some GCC 14 versions, which clobber r2. With other compilers, the kernel-provided r2 value is still available at this point. Reviewed-by: Peter Bergner (cherry picked from commit 14e56bd4ce15ac2d1cc43f762eb2e6b83fec1afe) diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h index c6682f3445..2b6f5d2b08 100644 --- a/sysdeps/powerpc/powerpc64/dl-machine.h +++ b/sysdeps/powerpc/powerpc64/dl-machine.h @@ -78,6 +78,7 @@ elf_host_tolerates_class (const Elf64_Ehdr *ehdr) static inline Elf64_Addr elf_machine_load_address (void) __attribute__ ((const)); +#ifndef __PCREL__ static inline Elf64_Addr elf_machine_load_address (void) { @@ -105,6 +106,24 @@ elf_machine_dynamic (void) /* Then subtract off the load address offset. */ return runtime_dynamic - elf_machine_load_address() ; } +#else /* __PCREL__ */ +/* In PCREL mode, r2 may have been clobbered. Rely on relative + relocations instead. */ + +static inline ElfW(Addr) +elf_machine_load_address (void) +{ + extern const ElfW(Ehdr) __ehdr_start attribute_hidden; + return (ElfW(Addr)) &__ehdr_start; +} + +static inline ElfW(Addr) +elf_machine_dynamic (void) +{ + extern ElfW(Dyn) _DYNAMIC[] attribute_hidden; + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address (); +} +#endif /* __PCREL__ */ /* The PLT uses Elf64_Rela relocs. */ #define elf_machine_relplt elf_machine_rela commit 04df8652eb1919da18d54b3dcd6db1675993d45d Author: H.J. Lu Date: Thu Feb 15 11:19:56 2024 -0800 Apply the Makefile sorting fix Apply the Makefile sorting fix generated by sort-makefile-lines.py. (cherry picked from commit ef7f4b1fef67430a8f3cfc77fa6aada2add851d7) diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile index fe863e1ba4..01762ef526 100644 --- a/sysdeps/loongarch/lp64/multiarch/Makefile +++ b/sysdeps/loongarch/lp64/multiarch/Makefile @@ -1,52 +1,52 @@ ifeq ($(subdir),string) sysdep_routines += \ - strlen-aligned \ - strlen-lsx \ - strlen-lasx \ - strnlen-aligned \ - strnlen-lsx \ - strnlen-lasx \ + memchr-aligned \ + memchr-lasx \ + memchr-lsx \ + memcmp-aligned \ + memcmp-lasx \ + memcmp-lsx \ + memcpy-aligned \ + memcpy-unaligned \ + memmove-lasx \ + memmove-lsx \ + memmove-unaligned \ + memrchr-generic \ + memrchr-lasx \ + memrchr-lsx \ + memset-aligned \ + memset-lasx \ + memset-lsx \ + memset-unaligned \ + rawmemchr-aligned \ + rawmemchr-lasx \ + rawmemchr-lsx \ + stpcpy-aligned \ + stpcpy-lasx \ + stpcpy-lsx \ + stpcpy-unaligned \ strchr-aligned \ - strchr-lsx \ strchr-lasx \ - strrchr-aligned \ - strrchr-lsx \ - strrchr-lasx \ + strchr-lsx \ strchrnul-aligned \ - strchrnul-lsx \ strchrnul-lasx \ + strchrnul-lsx \ strcmp-aligned \ strcmp-lsx \ - strncmp-aligned \ - strncmp-lsx \ strcpy-aligned \ - strcpy-unaligned \ - strcpy-lsx \ strcpy-lasx \ - stpcpy-aligned \ - stpcpy-unaligned \ - stpcpy-lsx \ - stpcpy-lasx \ - memcpy-aligned \ - memcpy-unaligned \ - memmove-unaligned \ - memmove-lsx \ - memmove-lasx \ - rawmemchr-aligned \ - rawmemchr-lsx \ - rawmemchr-lasx \ - memchr-aligned \ - memchr-lsx \ - memchr-lasx \ - memrchr-generic \ - memrchr-lsx \ - memrchr-lasx \ - memset-aligned \ - memset-unaligned \ - memset-lsx \ - memset-lasx \ - memcmp-aligned \ - memcmp-lsx \ - memcmp-lasx \ + strcpy-lsx \ + strcpy-unaligned \ + strlen-aligned \ + strlen-lasx \ + strlen-lsx \ + strncmp-aligned \ + strncmp-lsx \ + strnlen-aligned \ + strnlen-lasx \ + strnlen-lsx \ + strrchr-aligned \ + strrchr-lasx \ + strrchr-lsx \ # sysdep_routines endif diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 992aabe43e..5311b594af 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -15,18 +15,18 @@ CFLAGS-dl-get-cpu-features.os += $(rtld-early-cflags) CFLAGS-get-cpuid-feature-leaf.o += $(no-stack-protector) tests += \ - tst-get-cpu-features \ - tst-get-cpu-features-static \ tst-cpu-features-cpuinfo \ tst-cpu-features-cpuinfo-static \ tst-cpu-features-supports \ tst-cpu-features-supports-static \ + tst-get-cpu-features \ + tst-get-cpu-features-static \ tst-hwcap-tunables \ # tests tests-static += \ - tst-get-cpu-features-static \ tst-cpu-features-cpuinfo-static \ tst-cpu-features-supports-static \ + tst-get-cpu-features-static \ # tests-static ifeq (yes,$(have-ifunc)) ifeq (yes,$(have-gcc-ifunc)) diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index 9d374a3299..0ede447405 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -252,6 +252,10 @@ sysdep-dl-routines += dl-cet tests += \ tst-cet-legacy-1 \ + tst-cet-legacy-10 \ + tst-cet-legacy-10-static \ + tst-cet-legacy-10a \ + tst-cet-legacy-10a-static \ tst-cet-legacy-1a \ tst-cet-legacy-2 \ tst-cet-legacy-2a \ @@ -263,15 +267,11 @@ tests += \ tst-cet-legacy-8 \ tst-cet-legacy-9 \ tst-cet-legacy-9-static \ - tst-cet-legacy-10 \ - tst-cet-legacy-10-static \ - tst-cet-legacy-10a \ - tst-cet-legacy-10a-static \ # tests tests-static += \ - tst-cet-legacy-9-static \ tst-cet-legacy-10-static \ tst-cet-legacy-10a-static \ + tst-cet-legacy-9-static \ # tests-static tst-cet-legacy-1a-ARGS = -- $(host-test-program-cmd) diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index ea81753b70..e1a490dd98 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -4,10 +4,10 @@ libm-sysdep_routines += \ s_ceilf-c \ s_floor-c \ s_floorf-c \ - s_rint-c \ - s_rintf-c \ s_nearbyint-c \ s_nearbyintf-c \ + s_rint-c \ + s_rintf-c \ s_roundeven-c \ s_roundevenf-c \ s_trunc-c \ @@ -21,10 +21,10 @@ libm-sysdep_routines += \ s_floorf-sse4_1 \ s_nearbyint-sse4_1 \ s_nearbyintf-sse4_1 \ - s_roundeven-sse4_1 \ - s_roundevenf-sse4_1 \ s_rint-sse4_1 \ s_rintf-sse4_1 \ + s_roundeven-sse4_1 \ + s_roundevenf-sse4_1 \ s_trunc-sse4_1 \ s_truncf-sse4_1 \ # libm-sysdep_routines @@ -84,12 +84,12 @@ CFLAGS-s_cosf-fma.c = -mfma -mavx2 CFLAGS-s_sincosf-fma.c = -mfma -mavx2 libm-sysdep_routines += \ + e_asin-fma4 \ + e_atan2-fma4 \ e_exp-fma4 \ e_log-fma4 \ e_pow-fma4 \ - e_asin-fma4 \ s_atan-fma4 \ - e_atan2-fma4 \ s_sin-fma4 \ s_sincos-fma4 \ s_tan-fma4 \ @@ -106,10 +106,10 @@ CFLAGS-s_tan-fma4.c = -mfma4 CFLAGS-s_sincos-fma4.c = -mfma4 libm-sysdep_routines += \ + e_atan2-avx \ e_exp-avx \ e_log-avx \ s_atan-avx \ - e_atan2-avx \ s_sin-avx \ s_sincos-avx \ s_tan-avx \ diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index e1e894c963..d3d2270394 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -4,8 +4,8 @@ sysdep_routines += \ memchr-avx2 \ memchr-avx2-rtm \ memchr-evex \ - memchr-evex512 \ memchr-evex-rtm \ + memchr-evex512 \ memchr-sse2 \ memcmp-avx2-movbe \ memcmp-avx2-movbe-rtm \ @@ -37,8 +37,8 @@ sysdep_routines += \ rawmemchr-avx2 \ rawmemchr-avx2-rtm \ rawmemchr-evex \ - rawmemchr-evex512 \ rawmemchr-evex-rtm \ + rawmemchr-evex512 \ rawmemchr-sse2 \ stpcpy-avx2 \ stpcpy-avx2-rtm \ commit 423099a03264ea28298f47355d7811b8efe03c97 Author: Sunil K Pandey Date: Tue Feb 13 12:23:14 2024 -0800 x86_64: Exclude SSE, AVX and FMA4 variants in libm multiarch When glibc is built with ISA level 3 or higher by default, the resulting glibc binaries won't run on SSE or FMA4 processors. Exclude SSE, AVX and FMA4 variants in libm multiarch when ISA level 3 or higher is enabled by default. When glibc is built with ISA level 2 enabled by default, only keep SSE4.1 variant. Fixes BZ 31335. NB: elf/tst-valgrind-smoke test fails with ISA level 4, because valgrind doesn't support AVX512 instructions: https://bugs.kde.org/show_bug.cgi?id=383010 Reviewed-by: H.J. Lu (cherry picked from commit 9f78a7c1d0963282608da836b840f0d5ae1c478e) diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure index 1f4c2d67fd..2a5421bb31 100644 --- a/sysdeps/x86/configure +++ b/sysdeps/x86/configure @@ -98,6 +98,7 @@ printf "%s\n" "$libc_cv_have_x86_lahf_sahf" >&6; } if test $libc_cv_have_x86_lahf_sahf = yes; then printf "%s\n" "#define HAVE_X86_LAHF_SAHF 1" >>confdefs.h + ISAFLAG="-DHAVE_X86_LAHF_SAHF" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for MOVBE instruction support" >&5 printf %s "checking for MOVBE instruction support... " >&6; } @@ -120,9 +121,41 @@ printf "%s\n" "$libc_cv_have_x86_movbe" >&6; } if test $libc_cv_have_x86_movbe = yes; then printf "%s\n" "#define HAVE_X86_MOVBE 1" >>confdefs.h + ISAFLAG="$ISAFLAG -DHAVE_X86_MOVBE" fi + + # Check for ISA level support. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ISA level support" >&5 +printf %s "checking for ISA level support... " >&6; } +if test ${libc_cv_have_x86_isa_level+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat > conftest.c < +#if MINIMUM_X86_ISA_LEVEL >= 4 +libc_cv_have_x86_isa_level=4 +#elif MINIMUM_X86_ISA_LEVEL == 3 +libc_cv_have_x86_isa_level=3 +#elif MINIMUM_X86_ISA_LEVEL == 2 +libc_cv_have_x86_isa_level=2 +#else +libc_cv_have_x86_isa_level=baseline +#endif +EOF + eval `${CC-cc} $CFLAGS $CPPFLAGS $ISAFLAG -I$srcdir -E conftest.c | grep libc_cv_have_x86_isa_level` + rm -rf conftest* +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_have_x86_isa_level" >&5 +printf "%s\n" "$libc_cv_have_x86_isa_level" >&6; } +else + libc_cv_have_x86_isa_level=baseline fi config_vars="$config_vars +have-x86-isa-level = $libc_cv_have_x86_isa_level" +config_vars="$config_vars +x86-isa-level-3-or-above = 3 4" +config_vars="$config_vars enable-x86-isa-level = $libc_cv_include_x86_isa_level" printf "%s\n" "#define SUPPORT_STATIC_PIE 1" >>confdefs.h diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac index 437a50623b..78ff7c8f41 100644 --- a/sysdeps/x86/configure.ac +++ b/sysdeps/x86/configure.ac @@ -72,6 +72,7 @@ if test $libc_cv_include_x86_isa_level = yes; then fi]) if test $libc_cv_have_x86_lahf_sahf = yes; then AC_DEFINE(HAVE_X86_LAHF_SAHF) + ISAFLAG="-DHAVE_X86_LAHF_SAHF" fi AC_CACHE_CHECK([for MOVBE instruction support], libc_cv_have_x86_movbe, [dnl @@ -81,8 +82,31 @@ if test $libc_cv_include_x86_isa_level = yes; then fi]) if test $libc_cv_have_x86_movbe = yes; then AC_DEFINE(HAVE_X86_MOVBE) + ISAFLAG="$ISAFLAG -DHAVE_X86_MOVBE" fi + + # Check for ISA level support. + AC_CACHE_CHECK([for ISA level support], + libc_cv_have_x86_isa_level, [dnl +cat > conftest.c < +#if MINIMUM_X86_ISA_LEVEL >= 4 +libc_cv_have_x86_isa_level=4 +#elif MINIMUM_X86_ISA_LEVEL == 3 +libc_cv_have_x86_isa_level=3 +#elif MINIMUM_X86_ISA_LEVEL == 2 +libc_cv_have_x86_isa_level=2 +#else +libc_cv_have_x86_isa_level=baseline +#endif +EOF + eval `${CC-cc} $CFLAGS $CPPFLAGS $ISAFLAG -I$srcdir -E conftest.c | grep libc_cv_have_x86_isa_level` + rm -rf conftest*]) +else + libc_cv_have_x86_isa_level=baseline fi +LIBC_CONFIG_VAR([have-x86-isa-level], [$libc_cv_have_x86_isa_level]) +LIBC_CONFIG_VAR([x86-isa-level-3-or-above], [3 4]) LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level]) dnl Static PIE is supported. diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index e1a490dd98..6ddd50240c 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -1,49 +1,4 @@ ifeq ($(subdir),math) -libm-sysdep_routines += \ - s_ceil-c \ - s_ceilf-c \ - s_floor-c \ - s_floorf-c \ - s_nearbyint-c \ - s_nearbyintf-c \ - s_rint-c \ - s_rintf-c \ - s_roundeven-c \ - s_roundevenf-c \ - s_trunc-c \ - s_truncf-c \ -# libm-sysdep_routines - -libm-sysdep_routines += \ - s_ceil-sse4_1 \ - s_ceilf-sse4_1 \ - s_floor-sse4_1 \ - s_floorf-sse4_1 \ - s_nearbyint-sse4_1 \ - s_nearbyintf-sse4_1 \ - s_rint-sse4_1 \ - s_rintf-sse4_1 \ - s_roundeven-sse4_1 \ - s_roundevenf-sse4_1 \ - s_trunc-sse4_1 \ - s_truncf-sse4_1 \ -# libm-sysdep_routines - -libm-sysdep_routines += \ - e_asin-fma \ - e_atan2-fma \ - e_exp-fma \ - e_log-fma \ - e_log2-fma \ - e_pow-fma \ - s_atan-fma \ - s_expm1-fma \ - s_log1p-fma \ - s_sin-fma \ - s_sincos-fma \ - s_tan-fma \ -# libm-sysdep_routines - CFLAGS-e_asin-fma.c = -mfma -mavx2 CFLAGS-e_atan2-fma.c = -mfma -mavx2 CFLAGS-e_exp-fma.c = -mfma -mavx2 @@ -57,23 +12,6 @@ CFLAGS-s_sin-fma.c = -mfma -mavx2 CFLAGS-s_tan-fma.c = -mfma -mavx2 CFLAGS-s_sincos-fma.c = -mfma -mavx2 -libm-sysdep_routines += \ - s_cosf-sse2 \ - s_sincosf-sse2 \ - s_sinf-sse2 \ -# libm-sysdep_routines - -libm-sysdep_routines += \ - e_exp2f-fma \ - e_expf-fma \ - e_log2f-fma \ - e_logf-fma \ - e_powf-fma \ - s_cosf-fma \ - s_sincosf-fma \ - s_sinf-fma \ -# libm-sysdep_routines - CFLAGS-e_exp2f-fma.c = -mfma -mavx2 CFLAGS-e_expf-fma.c = -mfma -mavx2 CFLAGS-e_log2f-fma.c = -mfma -mavx2 @@ -83,17 +21,93 @@ CFLAGS-s_sinf-fma.c = -mfma -mavx2 CFLAGS-s_cosf-fma.c = -mfma -mavx2 CFLAGS-s_sincosf-fma.c = -mfma -mavx2 +# Check if ISA level is 3 or above. +ifneq (,$(filter $(have-x86-isa-level),$(x86-isa-level-3-or-above))) libm-sysdep_routines += \ + s_ceil-avx \ + s_ceilf-avx \ + s_floor-avx \ + s_floorf-avx \ + s_nearbyint-avx \ + s_nearbyintf-avx \ + s_rint-avx \ + s_rintf-avx \ + s_roundeven-avx \ + s_roundevenf-avx \ + s_trunc-avx \ + s_truncf-avx \ +# libm-sysdep_routines +else +libm-sysdep_routines += \ + e_asin-fma \ e_asin-fma4 \ + e_atan2-avx \ + e_atan2-fma \ e_atan2-fma4 \ + e_exp-avx \ + e_exp-fma \ e_exp-fma4 \ + e_exp2f-fma \ + e_expf-fma \ + e_log-avx \ + e_log-fma \ e_log-fma4 \ + e_log2-fma \ + e_log2f-fma \ + e_logf-fma \ + e_pow-fma \ e_pow-fma4 \ + e_powf-fma \ + s_atan-avx \ + s_atan-fma \ s_atan-fma4 \ + s_ceil-sse4_1 \ + s_ceilf-sse4_1 \ + s_cosf-fma \ + s_cosf-sse2 \ + s_expm1-fma \ + s_floor-sse4_1 \ + s_floorf-sse4_1 \ + s_log1p-fma \ + s_nearbyint-sse4_1 \ + s_nearbyintf-sse4_1 \ + s_rint-sse4_1 \ + s_rintf-sse4_1 \ + s_roundeven-sse4_1 \ + s_roundevenf-sse4_1 \ + s_sin-avx \ + s_sin-fma \ s_sin-fma4 \ + s_sincos-avx \ + s_sincos-fma \ s_sincos-fma4 \ + s_sincosf-fma \ + s_sincosf-sse2 \ + s_sinf-fma \ + s_sinf-sse2 \ + s_tan-avx \ + s_tan-fma \ s_tan-fma4 \ + s_trunc-sse4_1 \ + s_truncf-sse4_1 \ # libm-sysdep_routines +ifeq ($(have-x86-isa-level),baseline) +libm-sysdep_routines += \ + s_ceil-c \ + s_ceilf-c \ + s_floor-c \ + s_floorf-c \ + s_nearbyint-c \ + s_nearbyintf-c \ + s_rint-c \ + s_rintf-c \ + s_roundeven-c \ + s_roundevenf-c \ + s_trunc-c \ + s_truncf-c \ +# libm-sysdep_routines +endif +endif CFLAGS-e_asin-fma4.c = -mfma4 CFLAGS-e_atan2-fma4.c = -mfma4 @@ -105,16 +119,6 @@ CFLAGS-s_sin-fma4.c = -mfma4 CFLAGS-s_tan-fma4.c = -mfma4 CFLAGS-s_sincos-fma4.c = -mfma4 -libm-sysdep_routines += \ - e_atan2-avx \ - e_exp-avx \ - e_log-avx \ - s_atan-avx \ - s_sin-avx \ - s_sincos-avx \ - s_tan-avx \ -# libm-sysdep_routines - CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c index 2eaa6c2c04..d64fca2586 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_asin.c +++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c @@ -16,26 +16,29 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include extern double __redirect_ieee754_asin (double); extern double __redirect_ieee754_acos (double); -#define SYMBOL_NAME ieee754_asin -#include "ifunc-fma4.h" +# define SYMBOL_NAME ieee754_asin +# include "ifunc-fma4.h" libc_ifunc_redirected (__redirect_ieee754_asin, __ieee754_asin, IFUNC_SELECTOR ()); libm_alias_finite (__ieee754_asin, __asin) -#undef SYMBOL_NAME -#define SYMBOL_NAME ieee754_acos -#include "ifunc-fma4.h" +# undef SYMBOL_NAME +# define SYMBOL_NAME ieee754_acos +# include "ifunc-fma4.h" libc_ifunc_redirected (__redirect_ieee754_acos, __ieee754_acos, IFUNC_SELECTOR ()); libm_alias_finite (__ieee754_acos, __acos) -#define __ieee754_acos __ieee754_acos_sse2 -#define __ieee754_asin __ieee754_asin_sse2 +# define __ieee754_acos __ieee754_acos_sse2 +# define __ieee754_asin __ieee754_asin_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c index 17ee4f3c36..8a86c14ded 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c @@ -16,16 +16,19 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include extern double __redirect_ieee754_atan2 (double, double); -#define SYMBOL_NAME ieee754_atan2 -#include "ifunc-avx-fma4.h" +# define SYMBOL_NAME ieee754_atan2 +# include "ifunc-avx-fma4.h" libc_ifunc_redirected (__redirect_ieee754_atan2, __ieee754_atan2, IFUNC_SELECTOR ()); libm_alias_finite (__ieee754_atan2, __atan2) -#define __ieee754_atan2 __ieee754_atan2_sse2 +# define __ieee754_atan2 __ieee754_atan2_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c index 406b7ebd44..d56329291a 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c @@ -16,17 +16,20 @@ License along with the GNU C Library; if not, see . */ -#include -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include +# include extern double __redirect_ieee754_exp (double); -#define SYMBOL_NAME ieee754_exp -#include "ifunc-avx-fma4.h" +# define SYMBOL_NAME ieee754_exp +# include "ifunc-avx-fma4.h" libc_ifunc_redirected (__redirect_ieee754_exp, __ieee754_exp, IFUNC_SELECTOR ()); libm_alias_finite (__ieee754_exp, __exp) -#define __exp __ieee754_exp_sse2 +# define __exp __ieee754_exp_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c index 804fd6be85..06fe5028d6 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c @@ -16,25 +16,28 @@ License along with the GNU C Library; if not, see . */ -#include -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include +# include extern float __redirect_exp2f (float); -#define SYMBOL_NAME exp2f -#include "ifunc-fma.h" +# define SYMBOL_NAME exp2f +# include "ifunc-fma.h" libc_ifunc_redirected (__redirect_exp2f, __exp2f, IFUNC_SELECTOR ()); -#ifdef SHARED +# ifdef SHARED versioned_symbol (libm, __ieee754_exp2f, exp2f, GLIBC_2_27); libm_alias_float_other (__exp2, exp2) -#else +# else libm_alias_float (__exp2, exp2) -#endif +# endif strong_alias (__exp2f, __ieee754_exp2f) libm_alias_finite (__exp2f, __exp2f) -#define __exp2f __exp2f_sse2 +# define __exp2f __exp2f_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf.c b/sysdeps/x86_64/fpu/multiarch/e_expf.c index 4a7e2a5bce..19d767f636 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_expf.c +++ b/sysdeps/x86_64/fpu/multiarch/e_expf.c @@ -16,28 +16,31 @@ License along with the GNU C Library; if not, see . */ -#include -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include +# include extern float __redirect_expf (float); -#define SYMBOL_NAME expf -#include "ifunc-fma.h" +# define SYMBOL_NAME expf +# include "ifunc-fma.h" libc_ifunc_redirected (__redirect_expf, __expf, IFUNC_SELECTOR ()); -#ifdef SHARED +# ifdef SHARED __hidden_ver1 (__expf, __GI___expf, __redirect_expf) __attribute__ ((visibility ("hidden"))); versioned_symbol (libm, __ieee754_expf, expf, GLIBC_2_27); libm_alias_float_other (__exp, exp) -#else +# else libm_alias_float (__exp, exp) -#endif +# endif strong_alias (__expf, __ieee754_expf) libm_alias_finite (__expf, __expf) -#define __expf __expf_sse2 +# define __expf __expf_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c index 067fbf58c3..d80c1b1463 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_log.c +++ b/sysdeps/x86_64/fpu/multiarch/e_log.c @@ -16,17 +16,20 @@ License along with the GNU C Library; if not, see . */ -#include -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include +# include extern double __redirect_ieee754_log (double); -#define SYMBOL_NAME ieee754_log -#include "ifunc-avx-fma4.h" +# define SYMBOL_NAME ieee754_log +# include "ifunc-avx-fma4.h" libc_ifunc_redirected (__redirect_ieee754_log, __ieee754_log, IFUNC_SELECTOR ()); libm_alias_finite (__ieee754_log, __log) -#define __log __ieee754_log_sse2 +# define __log __ieee754_log_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2.c b/sysdeps/x86_64/fpu/multiarch/e_log2.c index 9c57a2f6cc..9686782c09 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_log2.c +++ b/sysdeps/x86_64/fpu/multiarch/e_log2.c @@ -16,28 +16,31 @@ License along with the GNU C Library; if not, see . */ -#include -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include +# include extern double __redirect_log2 (double); -#define SYMBOL_NAME log2 -#include "ifunc-fma.h" +# define SYMBOL_NAME log2 +# include "ifunc-fma.h" libc_ifunc_redirected (__redirect_log2, __log2, IFUNC_SELECTOR ()); -#ifdef SHARED +# ifdef SHARED __hidden_ver1 (__log2, __GI___log2, __redirect_log2) __attribute__ ((visibility ("hidden"))); versioned_symbol (libm, __ieee754_log2, log2, GLIBC_2_29); libm_alias_double_other (__log2, log2) -#else +# else libm_alias_double (__log2, log2) -#endif +# endif strong_alias (__log2, __ieee754_log2) libm_alias_finite (__log2, __log2) -#define __log2 __log2_sse2 +# define __log2 __log2_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f.c b/sysdeps/x86_64/fpu/multiarch/e_log2f.c index 2b45c87f38..8ada46e11e 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_log2f.c +++ b/sysdeps/x86_64/fpu/multiarch/e_log2f.c @@ -16,28 +16,31 @@ License along with the GNU C Library; if not, see . */ -#include -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include +# include extern float __redirect_log2f (float); -#define SYMBOL_NAME log2f -#include "ifunc-fma.h" +# define SYMBOL_NAME log2f +# include "ifunc-fma.h" libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ()); -#ifdef SHARED +# ifdef SHARED __hidden_ver1 (__log2f, __GI___log2f, __redirect_log2f) __attribute__ ((visibility ("hidden"))); versioned_symbol (libm, __ieee754_log2f, log2f, GLIBC_2_27); libm_alias_float_other (__log2, log2) -#else +# else libm_alias_float (__log2, log2) -#endif +# endif strong_alias (__log2f, __ieee754_log2f) libm_alias_finite (__log2f, __log2f) -#define __log2f __log2f_sse2 +# define __log2f __log2f_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf.c b/sysdeps/x86_64/fpu/multiarch/e_logf.c index 97e23c8fea..a3978d9a8e 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_logf.c +++ b/sysdeps/x86_64/fpu/multiarch/e_logf.c @@ -16,28 +16,31 @@ License along with the GNU C Library; if not, see . */ -#include -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include +# include extern float __redirect_logf (float); -#define SYMBOL_NAME logf -#include "ifunc-fma.h" +# define SYMBOL_NAME logf +# include "ifunc-fma.h" libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ()); -#ifdef SHARED +# ifdef SHARED __hidden_ver1 (__logf, __GI___logf, __redirect_logf) __attribute__ ((visibility ("hidden"))); versioned_symbol (libm, __ieee754_logf, logf, GLIBC_2_27); libm_alias_float_other (__log, log) -#else +# else libm_alias_float (__log, log) -#endif +# endif strong_alias (__logf, __ieee754_logf) libm_alias_finite (__logf, __logf) -#define __logf __logf_sse2 +# define __logf __logf_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c index 42618e7112..f8f17aff9f 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_pow.c +++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c @@ -16,17 +16,20 @@ License along with the GNU C Library; if not, see . */ -#include -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include +# include extern double __redirect_ieee754_pow (double, double); -#define SYMBOL_NAME ieee754_pow -#include "ifunc-fma4.h" +# define SYMBOL_NAME ieee754_pow +# include "ifunc-fma4.h" libc_ifunc_redirected (__redirect_ieee754_pow, __ieee754_pow, IFUNC_SELECTOR ()); libm_alias_finite (__ieee754_pow, __pow) -#define __pow __ieee754_pow_sse2 +# define __pow __ieee754_pow_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf.c b/sysdeps/x86_64/fpu/multiarch/e_powf.c index 8e6ce13cc1..8b1a4c7d04 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_powf.c +++ b/sysdeps/x86_64/fpu/multiarch/e_powf.c @@ -16,31 +16,34 @@ License along with the GNU C Library; if not, see . */ -#include -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include +# include -#define powf __redirect_powf -#define __DECL_SIMD___redirect_powf -#include -#undef powf +# define powf __redirect_powf +# define __DECL_SIMD___redirect_powf +# include +# undef powf -#define SYMBOL_NAME powf -#include "ifunc-fma.h" +# define SYMBOL_NAME powf +# include "ifunc-fma.h" libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ()); -#ifdef SHARED +# ifdef SHARED __hidden_ver1 (__powf, __GI___powf, __redirect_powf) __attribute__ ((visibility ("hidden"))); versioned_symbol (libm, __ieee754_powf, powf, GLIBC_2_27); libm_alias_float_other (__pow, pow) -#else +# else libm_alias_float (__pow, pow) -#endif +# endif strong_alias (__powf, __ieee754_powf) libm_alias_finite (__powf, __powf) -#define __powf __powf_sse2 +# define __powf __powf_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c index 71bad096a9..4d2c6ce006 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_atan.c +++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c @@ -16,15 +16,18 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include extern double __redirect_atan (double); -#define SYMBOL_NAME atan -#include "ifunc-avx-fma4.h" +# define SYMBOL_NAME atan +# include "ifunc-avx-fma4.h" libc_ifunc_redirected (__redirect_atan, __atan, IFUNC_SELECTOR ()); libm_alias_double (__atan, atan) -#define __atan __atan_sse2 +# define __atan __atan_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S new file mode 100644 index 0000000000..e6c1106753 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S @@ -0,0 +1,28 @@ +/* AVX implementation of ceil function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY(__ceil) + vroundsd $10, %xmm0, %xmm0, %xmm0 + ret +END(__ceil) + +libm_alias_double (__ceil, ceil) diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S index 64119011ad..dba756c38f 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S @@ -17,8 +17,20 @@ #include +#include +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +# include +# define __ceil_sse41 __ceil + .text +#else .section .text.sse4.1,"ax",@progbits +#endif + ENTRY(__ceil_sse41) roundsd $10, %xmm0, %xmm0 ret END(__ceil_sse41) + +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +libm_alias_double (__ceil, ceil) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.c b/sysdeps/x86_64/fpu/multiarch/s_ceil.c index cc028addee..46c8e91e19 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_ceil.c +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.c @@ -16,17 +16,20 @@ License along with the GNU C Library; if not, see . */ -#define NO_MATH_REDIRECT -#include +#include +#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL +# define NO_MATH_REDIRECT +# include -#define ceil __redirect_ceil -#define __ceil __redirect___ceil -#include -#undef ceil -#undef __ceil +# define ceil __redirect_ceil +# define __ceil __redirect___ceil +# include +# undef ceil +# undef __ceil -#define SYMBOL_NAME ceil -#include "ifunc-sse4_1.h" +# define SYMBOL_NAME ceil +# include "ifunc-sse4_1.h" libc_ifunc_redirected (__redirect_ceil, __ceil, IFUNC_SELECTOR ()); libm_alias_double (__ceil, ceil) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S new file mode 100644 index 0000000000..b4d8ac0455 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S @@ -0,0 +1,28 @@ +/* AVX implementation of ceilf function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY(__ceilf) + vroundss $10, %xmm0, %xmm0, %xmm0 + ret +END(__ceilf) + +libm_alias_float (__ceil, ceil) diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S index dd9a9f6b71..9abc87b91a 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S @@ -17,8 +17,20 @@ #include +#include +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +# include +# define __ceilf_sse41 __ceilf + .text +#else .section .text.sse4.1,"ax",@progbits +#endif + ENTRY(__ceilf_sse41) roundss $10, %xmm0, %xmm0 ret END(__ceilf_sse41) + +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +libm_alias_float (__ceil, ceil) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c index 97a0ca7d19..bb53108f73 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c @@ -16,17 +16,20 @@ License along with the GNU C Library; if not, see . */ -#define NO_MATH_REDIRECT -#include +#include +#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL +# define NO_MATH_REDIRECT +# include -#define ceilf __redirect_ceilf -#define __ceilf __redirect___ceilf -#include -#undef ceilf -#undef __ceilf +# define ceilf __redirect_ceilf +# define __ceilf __redirect___ceilf +# include +# undef ceilf +# undef __ceilf -#define SYMBOL_NAME ceilf -#include "ifunc-sse4_1.h" +# define SYMBOL_NAME ceilf +# include "ifunc-sse4_1.h" libc_ifunc_redirected (__redirect_ceilf, __ceilf, IFUNC_SELECTOR ()); libm_alias_float (__ceil, ceil) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf.c b/sysdeps/x86_64/fpu/multiarch/s_cosf.c index 2703c576df..8a02e04538 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_cosf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_cosf.c @@ -16,13 +16,18 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include extern float __redirect_cosf (float); -#define SYMBOL_NAME cosf -#include "ifunc-fma.h" +# define SYMBOL_NAME cosf +# include "ifunc-fma.h" libc_ifunc_redirected (__redirect_cosf, __cosf, IFUNC_SELECTOR ()); libm_alias_float (__cos, cos) +#else +# include +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_expm1.c b/sysdeps/x86_64/fpu/multiarch/s_expm1.c index 8a2d69f9b2..d58ef3d8f5 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_expm1.c +++ b/sysdeps/x86_64/fpu/multiarch/s_expm1.c @@ -16,21 +16,24 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include extern double __redirect_expm1 (double); -#define SYMBOL_NAME expm1 -#include "ifunc-fma.h" +# define SYMBOL_NAME expm1 +# include "ifunc-fma.h" libc_ifunc_redirected (__redirect_expm1, __expm1, IFUNC_SELECTOR ()); libm_alias_double (__expm1, expm1) -#define __expm1 __expm1_sse2 +# define __expm1 __expm1_sse2 /* NB: __expm1 may be expanded to __expm1_sse2 in the following prototypes. */ extern long double __expm1l (long double); extern long double __expm1f128 (long double); +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S new file mode 100644 index 0000000000..ff74b5a8bf --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S @@ -0,0 +1,28 @@ +/* AVX implementation of floor function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY(__floor) + vroundsd $9, %xmm0, %xmm0, %xmm0 + ret +END(__floor) + +libm_alias_double (__floor, floor) diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S index 2f7521f39f..c9b9b0639b 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S +++ b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S @@ -17,8 +17,20 @@ #include +#include +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +# include +# define __floor_sse41 __floor + .text +#else .section .text.sse4.1,"ax",@progbits +#endif + ENTRY(__floor_sse41) roundsd $9, %xmm0, %xmm0 ret END(__floor_sse41) + +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +libm_alias_double (__floor, floor) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.c b/sysdeps/x86_64/fpu/multiarch/s_floor.c index 8cebd48e10..2c87dd0056 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_floor.c +++ b/sysdeps/x86_64/fpu/multiarch/s_floor.c @@ -16,17 +16,20 @@ License along with the GNU C Library; if not, see . */ -#define NO_MATH_REDIRECT -#include +#include +#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL +# define NO_MATH_REDIRECT +# include -#define floor __redirect_floor -#define __floor __redirect___floor -#include -#undef floor -#undef __floor +# define floor __redirect_floor +# define __floor __redirect___floor +# include +# undef floor +# undef __floor -#define SYMBOL_NAME floor -#include "ifunc-sse4_1.h" +# define SYMBOL_NAME floor +# include "ifunc-sse4_1.h" libc_ifunc_redirected (__redirect_floor, __floor, IFUNC_SELECTOR ()); libm_alias_double (__floor, floor) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S new file mode 100644 index 0000000000..c378baae8e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S @@ -0,0 +1,28 @@ +/* AVX implementation of floorf function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY(__floorf) + vroundss $9, %xmm0, %xmm0, %xmm0 + ret +END(__floorf) + +libm_alias_float (__floor, floor) diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S index 5f6020d27d..c2216899db 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S @@ -17,8 +17,20 @@ #include +#include +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +# include +# define __floorf_sse41 __floorf + .text +#else .section .text.sse4.1,"ax",@progbits +#endif + ENTRY(__floorf_sse41) roundss $9, %xmm0, %xmm0 ret END(__floorf_sse41) + +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +libm_alias_float (__floor, floor) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.c b/sysdeps/x86_64/fpu/multiarch/s_floorf.c index a14e18b03c..a277802b6d 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_floorf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.c @@ -16,17 +16,20 @@ License along with the GNU C Library; if not, see . */ -#define NO_MATH_REDIRECT -#include +#include +#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL +# define NO_MATH_REDIRECT +# include -#define floorf __redirect_floorf -#define __floorf __redirect___floorf -#include -#undef floorf -#undef __floorf +# define floorf __redirect_floorf +# define __floorf __redirect___floorf +# include +# undef floorf +# undef __floorf -#define SYMBOL_NAME floorf -#include "ifunc-sse4_1.h" +# define SYMBOL_NAME floorf +# include "ifunc-sse4_1.h" libc_ifunc_redirected (__redirect_floorf, __floorf, IFUNC_SELECTOR ()); libm_alias_float (__floor, floor) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_log1p.c b/sysdeps/x86_64/fpu/multiarch/s_log1p.c index a8e1a3f21b..3fa1185d81 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_log1p.c +++ b/sysdeps/x86_64/fpu/multiarch/s_log1p.c @@ -16,14 +16,17 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include extern double __redirect_log1p (double); -#define SYMBOL_NAME log1p -#include "ifunc-fma.h" +# define SYMBOL_NAME log1p +# include "ifunc-fma.h" libc_ifunc_redirected (__redirect_log1p, __log1p, IFUNC_SELECTOR ()); -#define __log1p __log1p_sse2 +# define __log1p __log1p_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S new file mode 100644 index 0000000000..5bfdf73c28 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S @@ -0,0 +1,28 @@ +/* AVX implementation of nearbyint function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY(__nearbyint) + vroundsd $0xc, %xmm0, %xmm0, %xmm0 + ret +END(__nearbyint) + +libm_alias_double (__nearbyint, nearbyint) diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S index 674f7eb40a..9d84410a1f 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S @@ -17,8 +17,20 @@ #include +#include +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +# include +# define __nearbyint_sse41 __nearbyint + .text +#else .section .text.sse4.1,"ax",@progbits +#endif + ENTRY(__nearbyint_sse41) roundsd $0xc, %xmm0, %xmm0 ret END(__nearbyint_sse41) + +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +libm_alias_double (__nearbyint, nearbyint) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c index 693e42dd4e..057a7ca60f 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c @@ -16,17 +16,20 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL +# include -#define nearbyint __redirect_nearbyint -#define __nearbyint __redirect___nearbyint -#include -#undef nearbyint -#undef __nearbyint +# define nearbyint __redirect_nearbyint +# define __nearbyint __redirect___nearbyint +# include +# undef nearbyint +# undef __nearbyint -#define SYMBOL_NAME nearbyint -#include "ifunc-sse4_1.h" +# define SYMBOL_NAME nearbyint +# include "ifunc-sse4_1.h" libc_ifunc_redirected (__redirect_nearbyint, __nearbyint, IFUNC_SELECTOR ()); libm_alias_double (__nearbyint, nearbyint) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S new file mode 100644 index 0000000000..1dbaed0324 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S @@ -0,0 +1,28 @@ +/* AVX implmentation of nearbyintf function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY(__nearbyintf) + vroundss $0xc, %xmm0, %xmm0, %xmm0 + ret +END(__nearbyintf) + +libm_alias_float (__nearbyint, nearbyint) diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S index 5892bd7563..3cf35f92d6 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S @@ -17,8 +17,20 @@ #include +#include +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +# include +# define __nearbyintf_sse41 __nearbyintf + .text +#else .section .text.sse4.1,"ax",@progbits +#endif + ENTRY(__nearbyintf_sse41) roundss $0xc, %xmm0, %xmm0 ret END(__nearbyintf_sse41) + +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +libm_alias_float (__nearbyint, nearbyint) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c index a0ac009f4b..41f374ba72 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c @@ -16,17 +16,20 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL +# include -#define nearbyintf __redirect_nearbyintf -#define __nearbyintf __redirect___nearbyintf -#include -#undef nearbyintf -#undef __nearbyintf +# define nearbyintf __redirect_nearbyintf +# define __nearbyintf __redirect___nearbyintf +# include +# undef nearbyintf +# undef __nearbyintf -#define SYMBOL_NAME nearbyintf -#include "ifunc-sse4_1.h" +# define SYMBOL_NAME nearbyintf +# include "ifunc-sse4_1.h" libc_ifunc_redirected (__redirect_nearbyintf, __nearbyintf, IFUNC_SELECTOR ()); libm_alias_float (__nearbyint, nearbyint) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S new file mode 100644 index 0000000000..2b403b331f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S @@ -0,0 +1,28 @@ +/* AVX implementation of rint function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY(__rint) + vroundsd $4, %xmm0, %xmm0, %xmm0 + ret +END(__rint) + +libm_alias_double (__rint, rint) diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S index 405372991b..8cd9cf759f 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S +++ b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S @@ -17,8 +17,20 @@ #include +#include +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +# include +# define __rint_sse41 __rint + .text +#else .section .text.sse4.1,"ax",@progbits +#endif + ENTRY(__rint_sse41) roundsd $4, %xmm0, %xmm0 ret END(__rint_sse41) + +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +libm_alias_double (__rint, rint) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.c b/sysdeps/x86_64/fpu/multiarch/s_rint.c index 754c87e004..18623b7d99 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_rint.c +++ b/sysdeps/x86_64/fpu/multiarch/s_rint.c @@ -16,17 +16,20 @@ License along with the GNU C Library; if not, see . */ -#define NO_MATH_REDIRECT -#include +#include +#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL +# define NO_MATH_REDIRECT +# include -#define rint __redirect_rint -#define __rint __redirect___rint -#include -#undef rint -#undef __rint +# define rint __redirect_rint +# define __rint __redirect___rint +# include +# undef rint +# undef __rint -#define SYMBOL_NAME rint -#include "ifunc-sse4_1.h" +# define SYMBOL_NAME rint +# include "ifunc-sse4_1.h" libc_ifunc_redirected (__redirect_rint, __rint, IFUNC_SELECTOR ()); libm_alias_double (__rint, rint) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S new file mode 100644 index 0000000000..171c2867f4 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S @@ -0,0 +1,28 @@ +/* AVX implementation of rintf function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY(__rintf) + vroundss $4, %xmm0, %xmm0, %xmm0 + ret +END(__rintf) + +libm_alias_float (__rint, rint) diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S index 8ac67ce767..fc1e70f0c9 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S @@ -17,8 +17,20 @@ #include +#include +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +# include +# define __rintf_sse41 __rintf + .text +#else .section .text.sse4.1,"ax",@progbits +#endif + ENTRY(__rintf_sse41) roundss $4, %xmm0, %xmm0 ret END(__rintf_sse41) + +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +libm_alias_float (__rint, rint) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.c b/sysdeps/x86_64/fpu/multiarch/s_rintf.c index e9d6b7a5f2..e275368dec 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_rintf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.c @@ -16,17 +16,20 @@ License along with the GNU C Library; if not, see . */ -#define NO_MATH_REDIRECT -#include +#include +#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL +# define NO_MATH_REDIRECT +# include -#define rintf __redirect_rintf -#define __rintf __redirect___rintf -#include -#undef rintf -#undef __rintf +# define rintf __redirect_rintf +# define __rintf __redirect___rintf +# include +# undef rintf +# undef __rintf -#define SYMBOL_NAME rintf -#include "ifunc-sse4_1.h" +# define SYMBOL_NAME rintf +# include "ifunc-sse4_1.h" libc_ifunc_redirected (__redirect_rintf, __rintf, IFUNC_SELECTOR ()); libm_alias_float (__rint, rint) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S new file mode 100644 index 0000000000..576790355c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S @@ -0,0 +1,28 @@ +/* AVX implementation of roundeven function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY(__roundeven) + vroundsd $8, %xmm0, %xmm0, %xmm0 + ret +END(__roundeven) + +libm_alias_double (__roundeven, roundeven) diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S index 5ef102336b..f00be56c59 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S +++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S @@ -17,8 +17,20 @@ #include +#include +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +# include +# define __roundeven_sse41 __roundeven + .text +#else .section .text.sse4.1,"ax",@progbits +#endif + ENTRY(__roundeven_sse41) roundsd $8, %xmm0, %xmm0 ret END(__roundeven_sse41) + +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +libm_alias_double (__roundeven, roundeven) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c index 8737b32e26..139aad088f 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c +++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c @@ -16,16 +16,19 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL +# include -#define roundeven __redirect_roundeven -#define __roundeven __redirect___roundeven -#include -#undef roundeven -#undef __roundeven +# define roundeven __redirect_roundeven +# define __roundeven __redirect___roundeven +# include +# undef roundeven +# undef __roundeven -#define SYMBOL_NAME roundeven -#include "ifunc-sse4_1.h" +# define SYMBOL_NAME roundeven +# include "ifunc-sse4_1.h" libc_ifunc_redirected (__redirect_roundeven, __roundeven, IFUNC_SELECTOR ()); libm_alias_double (__roundeven, roundeven) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S new file mode 100644 index 0000000000..42c359f4cd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S @@ -0,0 +1,28 @@ +/* AVX implementation of roundevenf function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY(__roundevenf) + vroundss $8, %xmm0, %xmm0, %xmm0 + ret +END(__roundevenf) + +libm_alias_float (__roundeven, roundeven) diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S index 792c90ba07..6b148e4353 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S +++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S @@ -17,8 +17,20 @@ #include +#include +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +# include +# define __roundevenf_sse41 __roundevenf + .text +#else .section .text.sse4.1,"ax",@progbits +#endif + ENTRY(__roundevenf_sse41) roundss $8, %xmm0, %xmm0 ret END(__roundevenf_sse41) + +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +libm_alias_float (__roundeven, roundeven) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c index e96016a4d5..2fb090075d 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c @@ -16,16 +16,19 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL +# include -#define roundevenf __redirect_roundevenf -#define __roundevenf __redirect___roundevenf -#include -#undef roundevenf -#undef __roundevenf +# define roundevenf __redirect_roundevenf +# define __roundevenf __redirect___roundevenf +# include +# undef roundevenf +# undef __roundevenf -#define SYMBOL_NAME roundevenf -#include "ifunc-sse4_1.h" +# define SYMBOL_NAME roundevenf +# include "ifunc-sse4_1.h" libc_ifunc_redirected (__redirect_roundevenf, __roundevenf, IFUNC_SELECTOR ()); libm_alias_float (__roundeven, roundeven) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c index 355cc0092e..21e77943a3 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_sin.c +++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c @@ -16,24 +16,27 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include extern double __redirect_sin (double); extern double __redirect_cos (double); -#define SYMBOL_NAME sin -#include "ifunc-avx-fma4.h" +# define SYMBOL_NAME sin +# include "ifunc-avx-fma4.h" libc_ifunc_redirected (__redirect_sin, __sin, IFUNC_SELECTOR ()); libm_alias_double (__sin, sin) -#undef SYMBOL_NAME -#define SYMBOL_NAME cos -#include "ifunc-avx-fma4.h" +# undef SYMBOL_NAME +# define SYMBOL_NAME cos +# include "ifunc-avx-fma4.h" libc_ifunc_redirected (__redirect_cos, __cos, IFUNC_SELECTOR ()); libm_alias_double (__cos, cos) -#define __cos __cos_sse2 -#define __sin __sin_sse2 +# define __cos __cos_sse2 +# define __sin __sin_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincos.c b/sysdeps/x86_64/fpu/multiarch/s_sincos.c index 70107e999c..b35757f8de 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_sincos.c +++ b/sysdeps/x86_64/fpu/multiarch/s_sincos.c @@ -16,15 +16,18 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include extern void __redirect_sincos (double, double *, double *); -#define SYMBOL_NAME sincos -#include "ifunc-fma4.h" +# define SYMBOL_NAME sincos +# include "ifunc-fma4.h" libc_ifunc_redirected (__redirect_sincos, __sincos, IFUNC_SELECTOR ()); libm_alias_double (__sincos, sincos) -#define __sincos __sincos_sse2 +# define __sincos __sincos_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c index 80bc028451..0ea9b40e84 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c @@ -16,13 +16,18 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include extern void __redirect_sincosf (float, float *, float *); -#define SYMBOL_NAME sincosf -#include "ifunc-fma.h" +# define SYMBOL_NAME sincosf +# include "ifunc-fma.h" libc_ifunc_redirected (__redirect_sincosf, __sincosf, IFUNC_SELECTOR ()); libm_alias_float (__sincos, sincos) +#else +# include +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c index a32b9e9550..c61624e3ee 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_sinf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c @@ -16,13 +16,18 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include extern float __redirect_sinf (float); -#define SYMBOL_NAME sinf -#include "ifunc-fma.h" +# define SYMBOL_NAME sinf +# include "ifunc-fma.h" libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ()); libm_alias_float (__sin, sin) +#else +# include +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c index f9a2474a13..125d992ba1 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_tan.c +++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c @@ -16,15 +16,18 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include extern double __redirect_tan (double); -#define SYMBOL_NAME tan -#include "ifunc-avx-fma4.h" +# define SYMBOL_NAME tan +# include "ifunc-avx-fma4.h" libc_ifunc_redirected (__redirect_tan, __tan, IFUNC_SELECTOR ()); libm_alias_double (__tan, tan) -#define __tan __tan_sse2 +# define __tan __tan_sse2 +#endif #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S new file mode 100644 index 0000000000..b3e87e9606 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S @@ -0,0 +1,28 @@ +/* AVX implementation of trunc function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY(__trunc) + vroundsd $11, %xmm0, %xmm0, %xmm0 + ret +END(__trunc) + +libm_alias_double (__trunc, trunc) diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S index b496a6ef49..2b79174eed 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S @@ -18,8 +18,20 @@ #include +#include +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +# include +# define __trunc_sse41 __trunc + .text +#else .section .text.sse4.1,"ax",@progbits +#endif + ENTRY(__trunc_sse41) roundsd $11, %xmm0, %xmm0 ret END(__trunc_sse41) + +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +libm_alias_double (__trunc, trunc) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c index 9bc9df8744..ea89c4f85d 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_trunc.c +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c @@ -16,17 +16,20 @@ License along with the GNU C Library; if not, see . */ -#define NO_MATH_REDIRECT -#include +#include +#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL +# define NO_MATH_REDIRECT +# include -#define trunc __redirect_trunc -#define __trunc __redirect___trunc -#include -#undef trunc -#undef __trunc +# define trunc __redirect_trunc +# define __trunc __redirect___trunc +# include +# undef trunc +# undef __trunc -#define SYMBOL_NAME trunc -#include "ifunc-sse4_1.h" +# define SYMBOL_NAME trunc +# include "ifunc-sse4_1.h" libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ()); libm_alias_double (__trunc, trunc) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S new file mode 100644 index 0000000000..f31ac7d7f7 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S @@ -0,0 +1,28 @@ +/* AVX implementation of truncf function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY(__truncf) + vroundss $11, %xmm0, %xmm0, %xmm0 + ret +END(__truncf) + +libm_alias_float (__trunc, trunc) diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S index 22e9a83307..60498b2cb2 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S @@ -18,8 +18,20 @@ #include +#include +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +# include +# define __truncf_sse41 __truncf + .text +#else .section .text.sse4.1,"ax",@progbits +#endif + ENTRY(__truncf_sse41) roundss $11, %xmm0, %xmm0 ret END(__truncf_sse41) + +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +libm_alias_float (__trunc, trunc) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c index dae01d166a..92435ce39d 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_truncf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c @@ -16,17 +16,20 @@ License along with the GNU C Library; if not, see . */ -#define NO_MATH_REDIRECT -#include +#include +#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL +# define NO_MATH_REDIRECT +# include -#define truncf __redirect_truncf -#define __truncf __redirect___truncf -#include -#undef truncf -#undef __truncf +# define truncf __redirect_truncf +# define __truncf __redirect___truncf +# include +# undef truncf +# undef __truncf -#define SYMBOL_NAME truncf -#include "ifunc-sse4_1.h" +# define SYMBOL_NAME truncf +# include "ifunc-sse4_1.h" libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ()); libm_alias_float (__trunc, trunc) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/w_exp.c b/sysdeps/x86_64/fpu/multiarch/w_exp.c index 27eee98a0a..3584187e0e 100644 --- a/sysdeps/x86_64/fpu/multiarch/w_exp.c +++ b/sysdeps/x86_64/fpu/multiarch/w_exp.c @@ -1 +1,6 @@ -#include +#include +#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL +# include +#else +# include +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/w_log.c b/sysdeps/x86_64/fpu/multiarch/w_log.c index 9b2b018711..414ca3ca3d 100644 --- a/sysdeps/x86_64/fpu/multiarch/w_log.c +++ b/sysdeps/x86_64/fpu/multiarch/w_log.c @@ -1 +1,6 @@ -#include +#include +#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL +# include +#else +# include +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/w_pow.c b/sysdeps/x86_64/fpu/multiarch/w_pow.c index b50c1988de..d5fcc4f871 100644 --- a/sysdeps/x86_64/fpu/multiarch/w_pow.c +++ b/sysdeps/x86_64/fpu/multiarch/w_pow.c @@ -1 +1,6 @@ -#include +#include +#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL +# include +#else +# include +#endif commit 31da30f23cddd36db29d5b6a1c7619361b271fb4 Author: Charles Fol Date: Thu Mar 28 12:25:38 2024 -0300 iconv: ISO-2022-CN-EXT: fix out-of-bound writes when writing escape sequence (CVE-2024-2961) ISO-2022-CN-EXT uses escape sequences to indicate character set changes (as specified by RFC 1922). While the SOdesignation has the expected bounds checks, neither SS2designation nor SS3designation have its; allowing a write overflow of 1, 2, or 3 bytes with fixed values: '$+I', '$+J', '$+K', '$+L', '$+M', or '$*H'. Checked on aarch64-linux-gnu. Co-authored-by: Adhemerval Zanella Reviewed-by: Carlos O'Donell Tested-by: Carlos O'Donell (cherry picked from commit f9dc609e06b1136bb0408be9605ce7973a767ada) diff --git a/iconvdata/Makefile b/iconvdata/Makefile index ea019ce5c0..7196a8744b 100644 --- a/iconvdata/Makefile +++ b/iconvdata/Makefile @@ -75,7 +75,8 @@ ifeq (yes,$(build-shared)) tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \ - bug-iconv13 bug-iconv14 bug-iconv15 + bug-iconv13 bug-iconv14 bug-iconv15 \ + tst-iconv-iso-2022-cn-ext ifeq ($(have-thread-library),yes) tests += bug-iconv3 endif @@ -330,6 +331,8 @@ $(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \ $(addprefix $(objpfx),$(modules.so)) $(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \ $(addprefix $(objpfx),$(modules.so)) +$(objpfx)tst-iconv-iso-2022-cn-ext.out: $(addprefix $(objpfx), $(gconv-modules)) \ + $(addprefix $(objpfx),$(modules.so)) $(objpfx)iconv-test.out: run-iconv-test.sh \ $(addprefix $(objpfx), $(gconv-modules)) \ diff --git a/iconvdata/iso-2022-cn-ext.c b/iconvdata/iso-2022-cn-ext.c index b34c8a36f4..cce29b1969 100644 --- a/iconvdata/iso-2022-cn-ext.c +++ b/iconvdata/iso-2022-cn-ext.c @@ -574,6 +574,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized"); { \ const char *escseq; \ \ + if (outptr + 4 > outend) \ + { \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ assert (used == CNS11643_2_set); /* XXX */ \ escseq = "*H"; \ *outptr++ = ESC; \ @@ -587,6 +593,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized"); { \ const char *escseq; \ \ + if (outptr + 4 > outend) \ + { \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ assert ((used >> 5) >= 3 && (used >> 5) <= 7); \ escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2; \ *outptr++ = ESC; \ diff --git a/iconvdata/tst-iconv-iso-2022-cn-ext.c b/iconvdata/tst-iconv-iso-2022-cn-ext.c new file mode 100644 index 0000000000..96a8765fd5 --- /dev/null +++ b/iconvdata/tst-iconv-iso-2022-cn-ext.c @@ -0,0 +1,128 @@ +/* Verify ISO-2022-CN-EXT does not write out of the bounds. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +#include +#include +#include + +#include +#include +#include + +/* The test sets up a two memory page buffer with the second page marked + PROT_NONE to trigger a fault if the conversion writes beyond the exact + expected amount. Then we carry out various conversions and precisely + place the start of the output buffer in order to trigger a SIGSEGV if the + process writes anywhere between 1 and page sized bytes more (only one + PROT_NONE page is setup as a canary) than expected. These tests exercise + all three of the cases in ISO-2022-CN-EXT where the converter must switch + character sets and may run out of buffer space while doing the + operation. */ + +static int +do_test (void) +{ + iconv_t cd = iconv_open ("ISO-2022-CN-EXT", "UTF-8"); + TEST_VERIFY_EXIT (cd != (iconv_t) -1); + + char *ntf; + size_t ntfsize; + char *outbufbase; + { + int pgz = getpagesize (); + TEST_VERIFY_EXIT (pgz > 0); + ntfsize = 2 * pgz; + + ntf = xmmap (NULL, ntfsize, PROT_READ | PROT_WRITE, MAP_PRIVATE + | MAP_ANONYMOUS, -1); + xmprotect (ntf + pgz, pgz, PROT_NONE); + + outbufbase = ntf + pgz; + } + + /* Check if SOdesignation escape sequence does not trigger an OOB write. */ + { + char inbuf[] = "\xe4\xba\xa4\xe6\x8d\xa2"; + + for (int i = 0; i < 9; i++) + { + char *inp = inbuf; + size_t inleft = sizeof (inbuf) - 1; + + char *outp = outbufbase - i; + size_t outleft = i; + + TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) + == (size_t) -1); + TEST_COMPARE (errno, E2BIG); + + TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); + } + } + + /* Same as before for SS2designation. */ + { + char inbuf[] = "㴽 \xe3\xb4\xbd"; + + for (int i = 0; i < 14; i++) + { + char *inp = inbuf; + size_t inleft = sizeof (inbuf) - 1; + + char *outp = outbufbase - i; + size_t outleft = i; + + TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) + == (size_t) -1); + TEST_COMPARE (errno, E2BIG); + + TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); + } + } + + /* Same as before for SS3designation. */ + { + char inbuf[] = "劄 \xe5\x8a\x84"; + + for (int i = 0; i < 14; i++) + { + char *inp = inbuf; + size_t inleft = sizeof (inbuf) - 1; + + char *outp = outbufbase - i; + size_t outleft = i; + + TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) + == (size_t) -1); + TEST_COMPARE (errno, E2BIG); + + TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); + } + } + + TEST_VERIFY_EXIT (iconv_close (cd) != -1); + + xmunmap (ntf, ntfsize); + + return 0; +} + +#include commit e828914cf9f2fc2caa5bced0fc6a03cb78324979 Author: Florian Weimer Date: Tue Apr 23 21:16:32 2024 +0200 nptl: Fix tst-cancel30 on kernels without ppoll_time64 support Fall back to ppoll if ppoll_time64 fails with ENOSYS. Fixes commit 370da8a121c3ba9eeb2f13da15fc0f21f4136b25 ("nptl: Fix tst-cancel30 on sparc64"). Reviewed-by: Adhemerval Zanella (cherry picked from commit f4724843ada64a51d66f65d3199fe431f9d4c254) diff --git a/sysdeps/pthread/tst-cancel30.c b/sysdeps/pthread/tst-cancel30.c index 3030660e5f..94ad6281bc 100644 --- a/sysdeps/pthread/tst-cancel30.c +++ b/sysdeps/pthread/tst-cancel30.c @@ -18,6 +18,7 @@ License along with the GNU C Library; if not, see . */ +#include #include #include #include @@ -46,13 +47,19 @@ tf (void *arg) /* Wait indefinitely for cancellation, which only works if asynchronous cancellation is enabled. */ -#if defined SYS_ppoll || defined SYS_ppoll_time64 -# ifndef SYS_ppoll_time64 -# define SYS_ppoll_time64 SYS_ppoll +#ifdef SYS_ppoll_time64 + long int ret = syscall (SYS_ppoll_time64, NULL, 0, NULL, NULL); + (void) ret; +# ifdef SYS_ppoll + if (ret == -1 && errno == ENOSYS) + syscall (SYS_ppoll, NULL, 0, NULL, NULL); # endif - syscall (SYS_ppoll_time64, NULL, 0, NULL, NULL); #else +# ifdef SYS_ppoll + syscall (SYS_ppoll, NULL, 0, NULL, NULL); +# else for (;;); +# endif #endif return 0; commit e701c7d761f6e5c48d8e9dd5da88cbe2e94943f4 Author: Florian Weimer Date: Thu Apr 25 12:56:48 2024 +0200 i386: ulp update for SSE2 --disable-multi-arch configurations (cherry picked from commit 3a3a4497421422aa854c855cbe5110ca7d598ffc) diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps index 84e6686eba..f2139fc172 100644 --- a/sysdeps/i386/fpu/libm-test-ulps +++ b/sysdeps/i386/fpu/libm-test-ulps @@ -1232,6 +1232,7 @@ ldouble: 6 Function: "hypot": double: 1 +float: 1 float128: 1 ldouble: 1 commit 2f8f157eb0cc7f1d8d9a3fcaa8c55bed53b092a8 Author: H.J. Lu Date: Tue Apr 23 13:59:50 2024 -0700 x86: Define MINIMUM_X86_ISA_LEVEL in config.h [BZ #31676] Define MINIMUM_X86_ISA_LEVEL at configure time to avoid /usr/bin/ld: …/build/elf/librtld.os: in function `init_cpu_features': …/git/elf/../sysdeps/x86/cpu-features.c:1202: undefined reference to `_dl_runtime_resolve_fxsave' /usr/bin/ld: …/build/elf/librtld.os: relocation R_X86_64_PC32 against undefined hidden symbol `_dl_runtime_resolve_fxsave' can not be used when making a shared object /usr/bin/ld: final link failed: bad value collect2: error: ld returned 1 exit status when glibc is built with -march=x86-64-v3 and configured with --with-rtld-early-cflags=-march=x86-64, which is used to allow ld.so to print an error message on unsupported CPUs: Fatal glibc error: CPU does not support x86-64-v3 This fixes BZ #31676. Reviewed-by: Sunil K Pandey (cherry picked from commit 46c999741340ea559784c20a45077955b50aca43) diff --git a/config.h.in b/config.h.in index 4d33c63a84..1e647de585 100644 --- a/config.h.in +++ b/config.h.in @@ -286,6 +286,9 @@ /* Define if x86 ISA level should be included in shared libraries. */ #undef INCLUDE_X86_ISA_LEVEL +/* The x86 ISA level. 1 for baseline. Undefined on non-x86. */ +#undef MINIMUM_X86_ISA_LEVEL + /* Define if -msahf is enabled by default on x86. */ #undef HAVE_X86_LAHF_SAHF diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure index 2a5421bb31..d28d9bcb29 100644 --- a/sysdeps/x86/configure +++ b/sysdeps/x86/configure @@ -151,6 +151,13 @@ printf "%s\n" "$libc_cv_have_x86_isa_level" >&6; } else libc_cv_have_x86_isa_level=baseline fi +if test $libc_cv_have_x86_isa_level = baseline; then + printf "%s\n" "#define MINIMUM_X86_ISA_LEVEL 1" >>confdefs.h + +else + printf "%s\n" "#define MINIMUM_X86_ISA_LEVEL $libc_cv_have_x86_isa_level" >>confdefs.h + +fi config_vars="$config_vars have-x86-isa-level = $libc_cv_have_x86_isa_level" config_vars="$config_vars diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac index 78ff7c8f41..5b0acd03d2 100644 --- a/sysdeps/x86/configure.ac +++ b/sysdeps/x86/configure.ac @@ -105,6 +105,11 @@ EOF else libc_cv_have_x86_isa_level=baseline fi +if test $libc_cv_have_x86_isa_level = baseline; then + AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, 1) +else + AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, $libc_cv_have_x86_isa_level) +fi LIBC_CONFIG_VAR([have-x86-isa-level], [$libc_cv_have_x86_isa_level]) LIBC_CONFIG_VAR([x86-isa-level-3-or-above], [3 4]) LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level]) diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h index 11fe1ca90c..2c7f74212b 100644 --- a/sysdeps/x86/isa-level.h +++ b/sysdeps/x86/isa-level.h @@ -61,8 +61,10 @@ # define __X86_ISA_V4 0 #endif -#define MINIMUM_X86_ISA_LEVEL \ +#ifndef MINIMUM_X86_ISA_LEVEL +# define MINIMUM_X86_ISA_LEVEL \ (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4) +#endif /* Depending on the minimum ISA level, a feature check result can be a compile-time constant.. */ commit 1263d583d2e28afb8be53f8d6922f0842036f35d Author: Florian Weimer Date: Thu Apr 25 15:00:45 2024 +0200 CVE-2024-33599: nscd: Stack-based buffer overflow in netgroup cache (bug 31677) Using alloca matches what other caches do. The request length is bounded by MAXKEYLEN. Reviewed-by: Carlos O'Donell (cherry picked from commit 87801a8fd06db1d654eea3e4f7626ff476a9bdaa) diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c index 0c6e46f15c..f227dc7fa2 100644 --- a/nscd/netgroupcache.c +++ b/nscd/netgroupcache.c @@ -502,12 +502,13 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, = (struct indataset *) mempool_alloc (db, sizeof (*dataset) + req->key_len, 1); - struct indataset dataset_mem; bool cacheable = true; if (__glibc_unlikely (dataset == NULL)) { cacheable = false; - dataset = &dataset_mem; + /* The alloca is safe because nscd_run_worker verfies that + key_len is not larger than MAXKEYLEN. */ + dataset = alloca (sizeof (*dataset) + req->key_len); } datahead_init_pos (&dataset->head, sizeof (*dataset) + req->key_len, commit 5a508e0b508c8ad53bd0d2fb48fd71b242626341 Author: Florian Weimer Date: Thu Apr 25 15:01:07 2024 +0200 CVE-2024-33600: nscd: Do not send missing not-found response in addgetnetgrentX (bug 31678) If we failed to add a not-found response to the cache, the dataset point can be null, resulting in a null pointer dereference. Reviewed-by: Siddhesh Poyarekar (cherry picked from commit 7835b00dbce53c3c87bbbb1754a95fb5e58187aa) diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c index f227dc7fa2..c18fe111f3 100644 --- a/nscd/netgroupcache.c +++ b/nscd/netgroupcache.c @@ -147,7 +147,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, /* No such service. */ cacheable = do_notfound (db, fd, req, key, &dataset, &total, &timeout, &key_copy); - goto writeout; + goto maybe_cache_add; } memset (&data, '\0', sizeof (data)); @@ -348,7 +348,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, { cacheable = do_notfound (db, fd, req, key, &dataset, &total, &timeout, &key_copy); - goto writeout; + goto maybe_cache_add; } total = buffilled; @@ -410,14 +410,12 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, } if (he == NULL && fd != -1) - { - /* We write the dataset before inserting it to the database - since while inserting this thread might block and so would - unnecessarily let the receiver wait. */ - writeout: + /* We write the dataset before inserting it to the database since + while inserting this thread might block and so would + unnecessarily let the receiver wait. */ writeall (fd, &dataset->resp, dataset->head.recsize); - } + maybe_cache_add: if (cacheable) { /* If necessary, we also propagate the data to disk. */ commit c99f886de54446cd4447db6b44be93dabbdc2f8b Author: Florian Weimer Date: Thu Apr 25 15:01:07 2024 +0200 CVE-2024-33600: nscd: Avoid null pointer crashes after notfound response (bug 31678) The addgetnetgrentX call in addinnetgrX may have failed to produce a result, so the result variable in addinnetgrX can be NULL. Use db->negtimeout as the fallback value if there is no result data; the timeout is also overwritten below. Also avoid sending a second not-found response. (The client disconnects after receiving the first response, so the data stream did not go out of sync even without this fix.) It is still beneficial to add the negative response to the mapping, so that the client can get it from there in the future, instead of going through the socket. Reviewed-by: Siddhesh Poyarekar (cherry picked from commit b048a482f088e53144d26a61c390bed0210f49f2) diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c index c18fe111f3..e22ffa5884 100644 --- a/nscd/netgroupcache.c +++ b/nscd/netgroupcache.c @@ -511,14 +511,15 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, datahead_init_pos (&dataset->head, sizeof (*dataset) + req->key_len, sizeof (innetgroup_response_header), - he == NULL ? 0 : dh->nreloads + 1, result->head.ttl); + he == NULL ? 0 : dh->nreloads + 1, + result == NULL ? db->negtimeout : result->head.ttl); /* Set the notfound status and timeout based on the result from getnetgrent. */ - dataset->head.notfound = result->head.notfound; + dataset->head.notfound = result == NULL || result->head.notfound; dataset->head.timeout = timeout; dataset->resp.version = NSCD_VERSION; - dataset->resp.found = result->resp.found; + dataset->resp.found = result != NULL && result->resp.found; /* Until we find a matching entry the result is 0. */ dataset->resp.result = 0; @@ -566,7 +567,9 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, goto out; } - if (he == NULL) + /* addgetnetgrentX may have already sent a notfound response. Do + not send another one. */ + if (he == NULL && dataset->resp.found) { /* We write the dataset before inserting it to the database since while inserting this thread might block and so would commit a9a8d3eebb145779a18d90e3966009a1daa63cd8 Author: Florian Weimer Date: Thu Apr 25 15:01:07 2024 +0200 CVE-2024-33601, CVE-2024-33602: nscd: netgroup: Use two buffers in addgetnetgrentX (bug 31680) This avoids potential memory corruption when the underlying NSS callback function does not use the buffer space to store all strings (e.g., for constant strings). Instead of custom buffer management, two scratch buffers are used. This increases stack usage somewhat. Scratch buffer allocation failure is handled by return -1 (an invalid timeout value) instead of terminating the process. This fixes bug 31679. Reviewed-by: Siddhesh Poyarekar (cherry picked from commit c04a21e050d64a1193a6daab872bca2528bda44b) diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c index e22ffa5884..e8fe041846 100644 --- a/nscd/netgroupcache.c +++ b/nscd/netgroupcache.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "../nss/netgroup.h" #include "nscd.h" @@ -65,6 +66,16 @@ struct dataset char strdata[0]; }; +/* Send a notfound response to FD. Always returns -1 to indicate an + ephemeral error. */ +static time_t +send_notfound (int fd) +{ + if (fd != -1) + TEMP_FAILURE_RETRY (send (fd, ¬found, sizeof (notfound), MSG_NOSIGNAL)); + return -1; +} + /* Sends a notfound message and prepares a notfound dataset to write to the cache. Returns true if there was enough memory to allocate the dataset and returns the dataset in DATASETP, total bytes to write in TOTALP and the @@ -83,8 +94,7 @@ do_notfound (struct database_dyn *db, int fd, request_header *req, total = sizeof (notfound); timeout = time (NULL) + db->negtimeout; - if (fd != -1) - TEMP_FAILURE_RETRY (send (fd, ¬found, total, MSG_NOSIGNAL)); + send_notfound (fd); dataset = mempool_alloc (db, sizeof (struct dataset) + req->key_len, 1); /* If we cannot permanently store the result, so be it. */ @@ -109,11 +119,78 @@ do_notfound (struct database_dyn *db, int fd, request_header *req, return cacheable; } +struct addgetnetgrentX_scratch +{ + /* This is the result that the caller should use. It can be NULL, + point into buffer, or it can be in the cache. */ + struct dataset *dataset; + + struct scratch_buffer buffer; + + /* Used internally in addgetnetgrentX as a staging area. */ + struct scratch_buffer tmp; + + /* Number of bytes in buffer that are actually used. */ + size_t buffer_used; +}; + +static void +addgetnetgrentX_scratch_init (struct addgetnetgrentX_scratch *scratch) +{ + scratch->dataset = NULL; + scratch_buffer_init (&scratch->buffer); + scratch_buffer_init (&scratch->tmp); + + /* Reserve space for the header. */ + scratch->buffer_used = sizeof (struct dataset); + static_assert (sizeof (struct dataset) < sizeof (scratch->tmp.__space), + "initial buffer space"); + memset (scratch->tmp.data, 0, sizeof (struct dataset)); +} + +static void +addgetnetgrentX_scratch_free (struct addgetnetgrentX_scratch *scratch) +{ + scratch_buffer_free (&scratch->buffer); + scratch_buffer_free (&scratch->tmp); +} + +/* Copy LENGTH bytes from S into SCRATCH. Returns NULL if SCRATCH + could not be resized, otherwise a pointer to the copy. */ +static char * +addgetnetgrentX_append_n (struct addgetnetgrentX_scratch *scratch, + const char *s, size_t length) +{ + while (true) + { + size_t remaining = scratch->buffer.length - scratch->buffer_used; + if (remaining >= length) + break; + if (!scratch_buffer_grow_preserve (&scratch->buffer)) + return NULL; + } + char *copy = scratch->buffer.data + scratch->buffer_used; + memcpy (copy, s, length); + scratch->buffer_used += length; + return copy; +} + +/* Copy S into SCRATCH, including its null terminator. Returns false + if SCRATCH could not be resized. */ +static bool +addgetnetgrentX_append (struct addgetnetgrentX_scratch *scratch, const char *s) +{ + if (s == NULL) + s = ""; + return addgetnetgrentX_append_n (scratch, s, strlen (s) + 1) != NULL; +} + +/* Caller must initialize and free *SCRATCH. If the return value is + negative, this function has sent a notfound response. */ static time_t addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, const char *key, uid_t uid, struct hashentry *he, - struct datahead *dh, struct dataset **resultp, - void **tofreep) + struct datahead *dh, struct addgetnetgrentX_scratch *scratch) { if (__glibc_unlikely (debug_level > 0)) { @@ -132,14 +209,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, char *key_copy = NULL; struct __netgrent data; - size_t buflen = MAX (1024, sizeof (*dataset) + req->key_len); - size_t buffilled = sizeof (*dataset); - char *buffer = NULL; size_t nentries = 0; size_t group_len = strlen (key) + 1; struct name_list *first_needed = alloca (sizeof (struct name_list) + group_len); - *tofreep = NULL; if (netgroup_database == NULL && !__nss_database_get (nss_database_netgroup, &netgroup_database)) @@ -151,8 +224,6 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, } memset (&data, '\0', sizeof (data)); - buffer = xmalloc (buflen); - *tofreep = buffer; first_needed->next = first_needed; memcpy (first_needed->name, key, group_len); data.needed_groups = first_needed; @@ -195,8 +266,8 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, while (1) { int e; - status = getfct.f (&data, buffer + buffilled, - buflen - buffilled - req->key_len, &e); + status = getfct.f (&data, scratch->tmp.data, + scratch->tmp.length, &e); if (status == NSS_STATUS_SUCCESS) { if (data.type == triple_val) @@ -204,68 +275,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, const char *nhost = data.val.triple.host; const char *nuser = data.val.triple.user; const char *ndomain = data.val.triple.domain; - - size_t hostlen = strlen (nhost ?: "") + 1; - size_t userlen = strlen (nuser ?: "") + 1; - size_t domainlen = strlen (ndomain ?: "") + 1; - - if (nhost == NULL || nuser == NULL || ndomain == NULL - || nhost > nuser || nuser > ndomain) - { - const char *last = nhost; - if (last == NULL - || (nuser != NULL && nuser > last)) - last = nuser; - if (last == NULL - || (ndomain != NULL && ndomain > last)) - last = ndomain; - - size_t bufused - = (last == NULL - ? buffilled - : last + strlen (last) + 1 - buffer); - - /* We have to make temporary copies. */ - size_t needed = hostlen + userlen + domainlen; - - if (buflen - req->key_len - bufused < needed) - { - buflen += MAX (buflen, 2 * needed); - /* Save offset in the old buffer. We don't - bother with the NULL check here since - we'll do that later anyway. */ - size_t nhostdiff = nhost - buffer; - size_t nuserdiff = nuser - buffer; - size_t ndomaindiff = ndomain - buffer; - - char *newbuf = xrealloc (buffer, buflen); - /* Fix up the triplet pointers into the new - buffer. */ - nhost = (nhost ? newbuf + nhostdiff - : NULL); - nuser = (nuser ? newbuf + nuserdiff - : NULL); - ndomain = (ndomain ? newbuf + ndomaindiff - : NULL); - *tofreep = buffer = newbuf; - } - - nhost = memcpy (buffer + bufused, - nhost ?: "", hostlen); - nuser = memcpy ((char *) nhost + hostlen, - nuser ?: "", userlen); - ndomain = memcpy ((char *) nuser + userlen, - ndomain ?: "", domainlen); - } - - char *wp = buffer + buffilled; - wp = memmove (wp, nhost ?: "", hostlen); - wp += hostlen; - wp = memmove (wp, nuser ?: "", userlen); - wp += userlen; - wp = memmove (wp, ndomain ?: "", domainlen); - wp += domainlen; - buffilled = wp - buffer; + if (!(addgetnetgrentX_append (scratch, nhost) + && addgetnetgrentX_append (scratch, nuser) + && addgetnetgrentX_append (scratch, ndomain))) + return send_notfound (fd); ++nentries; } else @@ -317,8 +330,8 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, } else if (status == NSS_STATUS_TRYAGAIN && e == ERANGE) { - buflen *= 2; - *tofreep = buffer = xrealloc (buffer, buflen); + if (!scratch_buffer_grow (&scratch->tmp)) + return send_notfound (fd); } else if (status == NSS_STATUS_RETURN || status == NSS_STATUS_NOTFOUND @@ -351,10 +364,17 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, goto maybe_cache_add; } - total = buffilled; + /* Capture the result size without the key appended. */ + total = scratch->buffer_used; + + /* Make a copy of the key. The scratch buffer must not move after + this point. */ + key_copy = addgetnetgrentX_append_n (scratch, key, req->key_len); + if (key_copy == NULL) + return send_notfound (fd); /* Fill in the dataset. */ - dataset = (struct dataset *) buffer; + dataset = scratch->buffer.data; timeout = datahead_init_pos (&dataset->head, total + req->key_len, total - offsetof (struct dataset, resp), he == NULL ? 0 : dh->nreloads + 1, @@ -363,11 +383,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, dataset->resp.version = NSCD_VERSION; dataset->resp.found = 1; dataset->resp.nresults = nentries; - dataset->resp.result_len = buffilled - sizeof (*dataset); - - assert (buflen - buffilled >= req->key_len); - key_copy = memcpy (buffer + buffilled, key, req->key_len); - buffilled += req->key_len; + dataset->resp.result_len = total - sizeof (*dataset); /* Now we can determine whether on refill we have to create a new record or not. */ @@ -398,7 +414,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, if (__glibc_likely (newp != NULL)) { /* Adjust pointer into the memory block. */ - key_copy = (char *) newp + (key_copy - buffer); + key_copy = (char *) newp + (key_copy - (char *) dataset); dataset = memcpy (newp, dataset, total + req->key_len); cacheable = true; @@ -439,7 +455,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, } out: - *resultp = dataset; + scratch->dataset = dataset; return timeout; } @@ -460,6 +476,9 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, if (user != NULL) key = strchr (key, '\0') + 1; const char *domain = *key++ ? key : NULL; + struct addgetnetgrentX_scratch scratch; + + addgetnetgrentX_scratch_init (&scratch); if (__glibc_unlikely (debug_level > 0)) { @@ -475,12 +494,8 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, group, group_len, db, uid); time_t timeout; - void *tofree; if (result != NULL) - { - timeout = result->head.timeout; - tofree = NULL; - } + timeout = result->head.timeout; else { request_header req_get = @@ -489,7 +504,10 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, .key_len = group_len }; timeout = addgetnetgrentX (db, -1, &req_get, group, uid, NULL, NULL, - &result, &tofree); + &scratch); + result = scratch.dataset; + if (timeout < 0) + goto out; } struct indataset @@ -603,7 +621,7 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, } out: - free (tofree); + addgetnetgrentX_scratch_free (&scratch); return timeout; } @@ -613,11 +631,12 @@ addgetnetgrentX_ignore (struct database_dyn *db, int fd, request_header *req, const char *key, uid_t uid, struct hashentry *he, struct datahead *dh) { - struct dataset *ignore; - void *tofree; - time_t timeout = addgetnetgrentX (db, fd, req, key, uid, he, dh, - &ignore, &tofree); - free (tofree); + struct addgetnetgrentX_scratch scratch; + addgetnetgrentX_scratch_init (&scratch); + time_t timeout = addgetnetgrentX (db, fd, req, key, uid, he, dh, &scratch); + addgetnetgrentX_scratch_free (&scratch); + if (timeout < 0) + timeout = 0; return timeout; } @@ -661,5 +680,9 @@ readdinnetgr (struct database_dyn *db, struct hashentry *he, .key_len = he->len }; - return addinnetgrX (db, -1, &req, db->data + he->key, he->owner, he, dh); + int timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner, + he, dh); + if (timeout < 0) + timeout = 0; + return timeout; } commit fd658f026f25cf59e8db243bc3b3e09cd5a20ba0 Author: H.J. Lu Date: Thu Apr 25 08:06:52 2024 -0700 elf: Also compile dl-misc.os with $(rtld-early-cflags) Also compile dl-misc.os with $(rtld-early-cflags) to avoid Program received signal SIGILL, Illegal instruction. 0x00007ffff7fd36ea in _dl_strtoul (nptr=nptr@entry=0x7fffffffe2c9 "2", endptr=endptr@entry=0x7fffffffd728) at dl-misc.c:156 156 bool positive = true; (gdb) bt #0 0x00007ffff7fd36ea in _dl_strtoul (nptr=nptr@entry=0x7fffffffe2c9 "2", endptr=endptr@entry=0x7fffffffd728) at dl-misc.c:156 #1 0x00007ffff7fdb1a9 in tunable_initialize ( cur=cur@entry=0x7ffff7ffbc00 , strval=strval@entry=0x7fffffffe2c9 "2", len=len@entry=1) at dl-tunables.c:131 #2 0x00007ffff7fdb3a2 in parse_tunables (valstring=) at dl-tunables.c:258 #3 0x00007ffff7fdb5d9 in __GI___tunables_init (envp=0x7fffffffdd58) at dl-tunables.c:288 #4 0x00007ffff7fe44c3 in _dl_sysdep_start ( start_argptr=start_argptr@entry=0x7fffffffdcb0, dl_main=dl_main@entry=0x7ffff7fe5f80 ) at ../sysdeps/unix/sysv/linux/dl-sysdep.c:110 #5 0x00007ffff7fe5cae in _dl_start_final (arg=0x7fffffffdcb0) at rtld.c:494 #6 _dl_start (arg=0x7fffffffdcb0) at rtld.c:581 #7 0x00007ffff7fe4b38 in _start () (gdb) when setting GLIBC_TUNABLES in glibc compiled with APX. Reviewed-by: Florian Weimer (cherry picked from commit 049b7684c912dd32b67b1b15b0f43bf07d5f512e) diff --git a/elf/Makefile b/elf/Makefile index 69aa423c4b..a50a988e73 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -170,6 +170,7 @@ CFLAGS-.op += $(call elide-stack-protector,.op,$(elide-routines.os)) CFLAGS-.os += $(call elide-stack-protector,.os,$(all-rtld-routines)) # Add the requested compiler flags to the early startup code. +CFLAGS-dl-misc.os += $(rtld-early-cflags) CFLAGS-dl-printf.os += $(rtld-early-cflags) CFLAGS-dl-setup_hash.os += $(rtld-early-cflags) CFLAGS-dl-sysdep.os += $(rtld-early-cflags) commit 9831f98c266a8d56d1bf729b709c08e40375540c Author: Florian Weimer Date: Fri Apr 19 14:38:17 2024 +0200 login: Check default sizes of structs utmp, utmpx, lastlog The default is for ports with a 64-bit time_t. Ports with a 32-bit time_t or with __WORDSIZE_TIME64_COMPAT32=1 need to override it. Reviewed-by: Adhemerval Zanella (cherry picked from commit 4d4da5aab936504b2d3eca3146e109630d9093c4) diff --git a/login/Makefile b/login/Makefile index 1e22008a61..b26ac42bfc 100644 --- a/login/Makefile +++ b/login/Makefile @@ -44,7 +44,7 @@ subdir-dirs = programs vpath %.c programs tests := tst-utmp tst-utmpx tst-grantpt tst-ptsname tst-getlogin tst-updwtmpx \ - tst-pututxline-lockfail tst-pututxline-cache + tst-pututxline-lockfail tst-pututxline-cache tst-utmp-size # Empty compatibility library for old binaries. extra-libs := libutil diff --git a/login/tst-utmp-size.c b/login/tst-utmp-size.c new file mode 100644 index 0000000000..1b7f7ff042 --- /dev/null +++ b/login/tst-utmp-size.c @@ -0,0 +1,33 @@ +/* Check expected sizes of struct utmp, struct utmpx, struct lastlog. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +static int +do_test (void) +{ + _Static_assert (sizeof (struct utmp) == UTMP_SIZE, "struct utmp size"); + _Static_assert (sizeof (struct utmpx) == UTMP_SIZE, "struct utmpx size"); + _Static_assert (sizeof (struct lastlog) == LASTLOG_SIZE, + "struct lastlog size"); + return 0; +} + +#include diff --git a/sysdeps/arc/utmp-size.h b/sysdeps/arc/utmp-size.h new file mode 100644 index 0000000000..a247fcd3da --- /dev/null +++ b/sysdeps/arc/utmp-size.h @@ -0,0 +1,3 @@ +/* arc has less padding than other architectures with 64-bit time_t. */ +#define UTMP_SIZE 392 +#define LASTLOG_SIZE 296 diff --git a/sysdeps/arm/utmp-size.h b/sysdeps/arm/utmp-size.h new file mode 100644 index 0000000000..8f21ebe1b6 --- /dev/null +++ b/sysdeps/arm/utmp-size.h @@ -0,0 +1,2 @@ +#define UTMP_SIZE 384 +#define LASTLOG_SIZE 292 diff --git a/sysdeps/csky/utmp-size.h b/sysdeps/csky/utmp-size.h new file mode 100644 index 0000000000..8f21ebe1b6 --- /dev/null +++ b/sysdeps/csky/utmp-size.h @@ -0,0 +1,2 @@ +#define UTMP_SIZE 384 +#define LASTLOG_SIZE 292 diff --git a/sysdeps/generic/utmp-size.h b/sysdeps/generic/utmp-size.h new file mode 100644 index 0000000000..89dbe878b0 --- /dev/null +++ b/sysdeps/generic/utmp-size.h @@ -0,0 +1,23 @@ +/* Expected sizes of utmp-related structures stored in files. 64-bit version. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Expected size, in bytes, of struct utmp and struct utmpx. */ +#define UTMP_SIZE 400 + +/* Expected size, in bytes, of struct lastlog. */ +#define LASTLOG_SIZE 296 diff --git a/sysdeps/hppa/utmp-size.h b/sysdeps/hppa/utmp-size.h new file mode 100644 index 0000000000..8f21ebe1b6 --- /dev/null +++ b/sysdeps/hppa/utmp-size.h @@ -0,0 +1,2 @@ +#define UTMP_SIZE 384 +#define LASTLOG_SIZE 292 diff --git a/sysdeps/m68k/utmp-size.h b/sysdeps/m68k/utmp-size.h new file mode 100644 index 0000000000..5946685819 --- /dev/null +++ b/sysdeps/m68k/utmp-size.h @@ -0,0 +1,3 @@ +/* m68k has 2-byte alignment. */ +#define UTMP_SIZE 382 +#define LASTLOG_SIZE 292 diff --git a/sysdeps/microblaze/utmp-size.h b/sysdeps/microblaze/utmp-size.h new file mode 100644 index 0000000000..8f21ebe1b6 --- /dev/null +++ b/sysdeps/microblaze/utmp-size.h @@ -0,0 +1,2 @@ +#define UTMP_SIZE 384 +#define LASTLOG_SIZE 292 diff --git a/sysdeps/mips/utmp-size.h b/sysdeps/mips/utmp-size.h new file mode 100644 index 0000000000..8f21ebe1b6 --- /dev/null +++ b/sysdeps/mips/utmp-size.h @@ -0,0 +1,2 @@ +#define UTMP_SIZE 384 +#define LASTLOG_SIZE 292 diff --git a/sysdeps/nios2/utmp-size.h b/sysdeps/nios2/utmp-size.h new file mode 100644 index 0000000000..8f21ebe1b6 --- /dev/null +++ b/sysdeps/nios2/utmp-size.h @@ -0,0 +1,2 @@ +#define UTMP_SIZE 384 +#define LASTLOG_SIZE 292 diff --git a/sysdeps/or1k/utmp-size.h b/sysdeps/or1k/utmp-size.h new file mode 100644 index 0000000000..6b3653aa4d --- /dev/null +++ b/sysdeps/or1k/utmp-size.h @@ -0,0 +1,3 @@ +/* or1k has less padding than other architectures with 64-bit time_t. */ +#define UTMP_SIZE 392 +#define LASTLOG_SIZE 296 diff --git a/sysdeps/powerpc/utmp-size.h b/sysdeps/powerpc/utmp-size.h new file mode 100644 index 0000000000..8f21ebe1b6 --- /dev/null +++ b/sysdeps/powerpc/utmp-size.h @@ -0,0 +1,2 @@ +#define UTMP_SIZE 384 +#define LASTLOG_SIZE 292 diff --git a/sysdeps/riscv/utmp-size.h b/sysdeps/riscv/utmp-size.h new file mode 100644 index 0000000000..8f21ebe1b6 --- /dev/null +++ b/sysdeps/riscv/utmp-size.h @@ -0,0 +1,2 @@ +#define UTMP_SIZE 384 +#define LASTLOG_SIZE 292 diff --git a/sysdeps/sh/utmp-size.h b/sysdeps/sh/utmp-size.h new file mode 100644 index 0000000000..8f21ebe1b6 --- /dev/null +++ b/sysdeps/sh/utmp-size.h @@ -0,0 +1,2 @@ +#define UTMP_SIZE 384 +#define LASTLOG_SIZE 292 diff --git a/sysdeps/sparc/utmp-size.h b/sysdeps/sparc/utmp-size.h new file mode 100644 index 0000000000..8f21ebe1b6 --- /dev/null +++ b/sysdeps/sparc/utmp-size.h @@ -0,0 +1,2 @@ +#define UTMP_SIZE 384 +#define LASTLOG_SIZE 292 diff --git a/sysdeps/x86/utmp-size.h b/sysdeps/x86/utmp-size.h new file mode 100644 index 0000000000..8f21ebe1b6 --- /dev/null +++ b/sysdeps/x86/utmp-size.h @@ -0,0 +1,2 @@ +#define UTMP_SIZE 384 +#define LASTLOG_SIZE 292 commit 836d43b98973e0845b739ff5d3aad3af09dc7d0f Author: Florian Weimer Date: Fri Apr 19 14:38:17 2024 +0200 login: structs utmp, utmpx, lastlog _TIME_BITS independence (bug 30701) These structs describe file formats under /var/log, and should not depend on the definition of _TIME_BITS. This is achieved by defining __WORDSIZE_TIME64_COMPAT32 to 1 on 32-bit ports that support 32-bit time_t values (where __time_t is 32 bits). Reviewed-by: Adhemerval Zanella (cherry picked from commit 9abdae94c7454c45e02e97e4ed1eb1b1915d13d8) diff --git a/bits/wordsize.h b/bits/wordsize.h index 14edae3a11..53013a9275 100644 --- a/bits/wordsize.h +++ b/bits/wordsize.h @@ -21,7 +21,9 @@ #define __WORDSIZE32_PTRDIFF_LONG /* Set to 1 in order to force time types to be 32 bits instead of 64 bits in - struct lastlog and struct utmp{,x} on 64-bit ports. This may be done in + struct lastlog and struct utmp{,x}. This may be done in order to make 64-bit ports compatible with 32-bit ports. Set to 0 for - 64-bit ports where the time types are 64-bits or for any 32-bit ports. */ + 64-bit ports where the time types are 64-bits and new 32-bit ports + where time_t is 64 bits, and there is no companion architecture with + 32-bit time_t. */ #define __WORDSIZE_TIME64_COMPAT32 diff --git a/login/Makefile b/login/Makefile index b26ac42bfc..f91190e3dc 100644 --- a/login/Makefile +++ b/login/Makefile @@ -44,7 +44,9 @@ subdir-dirs = programs vpath %.c programs tests := tst-utmp tst-utmpx tst-grantpt tst-ptsname tst-getlogin tst-updwtmpx \ - tst-pututxline-lockfail tst-pututxline-cache tst-utmp-size + tst-pututxline-lockfail tst-pututxline-cache tst-utmp-size tst-utmp-size-64 + +CFLAGS-tst-utmp-size-64.c += -D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64 # Empty compatibility library for old binaries. extra-libs := libutil diff --git a/login/tst-utmp-size-64.c b/login/tst-utmp-size-64.c new file mode 100644 index 0000000000..7a581a4c12 --- /dev/null +++ b/login/tst-utmp-size-64.c @@ -0,0 +1,2 @@ +/* The on-disk layout must not change in time64 mode. */ +#include "tst-utmp-size.c" diff --git a/sysdeps/arm/bits/wordsize.h b/sysdeps/arm/bits/wordsize.h new file mode 100644 index 0000000000..6ecbfe7c86 --- /dev/null +++ b/sysdeps/arm/bits/wordsize.h @@ -0,0 +1,21 @@ +/* Copyright (C) 1999-2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define __WORDSIZE 32 +#define __WORDSIZE_TIME64_COMPAT32 1 +#define __WORDSIZE32_SIZE_ULONG 0 +#define __WORDSIZE32_PTRDIFF_LONG 0 diff --git a/sysdeps/csky/bits/wordsize.h b/sysdeps/csky/bits/wordsize.h new file mode 100644 index 0000000000..6ecbfe7c86 --- /dev/null +++ b/sysdeps/csky/bits/wordsize.h @@ -0,0 +1,21 @@ +/* Copyright (C) 1999-2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define __WORDSIZE 32 +#define __WORDSIZE_TIME64_COMPAT32 1 +#define __WORDSIZE32_SIZE_ULONG 0 +#define __WORDSIZE32_PTRDIFF_LONG 0 diff --git a/sysdeps/m68k/bits/wordsize.h b/sysdeps/m68k/bits/wordsize.h new file mode 100644 index 0000000000..6ecbfe7c86 --- /dev/null +++ b/sysdeps/m68k/bits/wordsize.h @@ -0,0 +1,21 @@ +/* Copyright (C) 1999-2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define __WORDSIZE 32 +#define __WORDSIZE_TIME64_COMPAT32 1 +#define __WORDSIZE32_SIZE_ULONG 0 +#define __WORDSIZE32_PTRDIFF_LONG 0 diff --git a/sysdeps/microblaze/bits/wordsize.h b/sysdeps/microblaze/bits/wordsize.h new file mode 100644 index 0000000000..6ecbfe7c86 --- /dev/null +++ b/sysdeps/microblaze/bits/wordsize.h @@ -0,0 +1,21 @@ +/* Copyright (C) 1999-2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define __WORDSIZE 32 +#define __WORDSIZE_TIME64_COMPAT32 1 +#define __WORDSIZE32_SIZE_ULONG 0 +#define __WORDSIZE32_PTRDIFF_LONG 0 diff --git a/sysdeps/mips/bits/wordsize.h b/sysdeps/mips/bits/wordsize.h index 57f0f2a22f..30dd3fd85d 100644 --- a/sysdeps/mips/bits/wordsize.h +++ b/sysdeps/mips/bits/wordsize.h @@ -19,11 +19,7 @@ #define __WORDSIZE _MIPS_SZPTR -#if _MIPS_SIM == _ABI64 -# define __WORDSIZE_TIME64_COMPAT32 1 -#else -# define __WORDSIZE_TIME64_COMPAT32 0 -#endif +#define __WORDSIZE_TIME64_COMPAT32 1 #if __WORDSIZE == 32 #define __WORDSIZE32_SIZE_ULONG 0 diff --git a/sysdeps/nios2/bits/wordsize.h b/sysdeps/nios2/bits/wordsize.h new file mode 100644 index 0000000000..6ecbfe7c86 --- /dev/null +++ b/sysdeps/nios2/bits/wordsize.h @@ -0,0 +1,21 @@ +/* Copyright (C) 1999-2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define __WORDSIZE 32 +#define __WORDSIZE_TIME64_COMPAT32 1 +#define __WORDSIZE32_SIZE_ULONG 0 +#define __WORDSIZE32_PTRDIFF_LONG 0 diff --git a/sysdeps/powerpc/powerpc32/bits/wordsize.h b/sysdeps/powerpc/powerpc32/bits/wordsize.h index 04ca9debf0..6993fb6b29 100644 --- a/sysdeps/powerpc/powerpc32/bits/wordsize.h +++ b/sysdeps/powerpc/powerpc32/bits/wordsize.h @@ -2,10 +2,9 @@ #if defined __powerpc64__ # define __WORDSIZE 64 -# define __WORDSIZE_TIME64_COMPAT32 1 #else # define __WORDSIZE 32 -# define __WORDSIZE_TIME64_COMPAT32 0 # define __WORDSIZE32_SIZE_ULONG 0 # define __WORDSIZE32_PTRDIFF_LONG 0 #endif +#define __WORDSIZE_TIME64_COMPAT32 1 diff --git a/sysdeps/powerpc/powerpc64/bits/wordsize.h b/sysdeps/powerpc/powerpc64/bits/wordsize.h index 04ca9debf0..6993fb6b29 100644 --- a/sysdeps/powerpc/powerpc64/bits/wordsize.h +++ b/sysdeps/powerpc/powerpc64/bits/wordsize.h @@ -2,10 +2,9 @@ #if defined __powerpc64__ # define __WORDSIZE 64 -# define __WORDSIZE_TIME64_COMPAT32 1 #else # define __WORDSIZE 32 -# define __WORDSIZE_TIME64_COMPAT32 0 # define __WORDSIZE32_SIZE_ULONG 0 # define __WORDSIZE32_PTRDIFF_LONG 0 #endif +#define __WORDSIZE_TIME64_COMPAT32 1 diff --git a/sysdeps/sh/bits/wordsize.h b/sysdeps/sh/bits/wordsize.h new file mode 100644 index 0000000000..6ecbfe7c86 --- /dev/null +++ b/sysdeps/sh/bits/wordsize.h @@ -0,0 +1,21 @@ +/* Copyright (C) 1999-2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define __WORDSIZE 32 +#define __WORDSIZE_TIME64_COMPAT32 1 +#define __WORDSIZE32_SIZE_ULONG 0 +#define __WORDSIZE32_PTRDIFF_LONG 0 diff --git a/sysdeps/sparc/sparc32/bits/wordsize.h b/sysdeps/sparc/sparc32/bits/wordsize.h index 4bbd2e63b4..a2e79e0fa9 100644 --- a/sysdeps/sparc/sparc32/bits/wordsize.h +++ b/sysdeps/sparc/sparc32/bits/wordsize.h @@ -1,6 +1,6 @@ /* Determine the wordsize from the preprocessor defines. */ #define __WORDSIZE 32 -#define __WORDSIZE_TIME64_COMPAT32 0 +#define __WORDSIZE_TIME64_COMPAT32 1 #define __WORDSIZE32_SIZE_ULONG 0 #define __WORDSIZE32_PTRDIFF_LONG 0 diff --git a/sysdeps/sparc/sparc64/bits/wordsize.h b/sysdeps/sparc/sparc64/bits/wordsize.h index 2f66f10d72..ea103e5970 100644 --- a/sysdeps/sparc/sparc64/bits/wordsize.h +++ b/sysdeps/sparc/sparc64/bits/wordsize.h @@ -2,10 +2,9 @@ #if defined __arch64__ || defined __sparcv9 # define __WORDSIZE 64 -# define __WORDSIZE_TIME64_COMPAT32 1 #else # define __WORDSIZE 32 -# define __WORDSIZE_TIME64_COMPAT32 0 # define __WORDSIZE32_SIZE_ULONG 0 # define __WORDSIZE32_PTRDIFF_LONG 0 #endif +#define __WORDSIZE_TIME64_COMPAT32 1 diff --git a/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h b/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h new file mode 100644 index 0000000000..6ecbfe7c86 --- /dev/null +++ b/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h @@ -0,0 +1,21 @@ +/* Copyright (C) 1999-2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define __WORDSIZE 32 +#define __WORDSIZE_TIME64_COMPAT32 1 +#define __WORDSIZE32_SIZE_ULONG 0 +#define __WORDSIZE32_PTRDIFF_LONG 0 diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h b/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h index 04ca9debf0..6993fb6b29 100644 --- a/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h +++ b/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h @@ -2,10 +2,9 @@ #if defined __powerpc64__ # define __WORDSIZE 64 -# define __WORDSIZE_TIME64_COMPAT32 1 #else # define __WORDSIZE 32 -# define __WORDSIZE_TIME64_COMPAT32 0 # define __WORDSIZE32_SIZE_ULONG 0 # define __WORDSIZE32_PTRDIFF_LONG 0 #endif +#define __WORDSIZE_TIME64_COMPAT32 1 diff --git a/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h b/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h index 7562875ee2..ea103e5970 100644 --- a/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h +++ b/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h @@ -2,10 +2,9 @@ #if defined __arch64__ || defined __sparcv9 # define __WORDSIZE 64 -# define __WORDSIZE_TIME64_COMPAT32 1 #else # define __WORDSIZE 32 # define __WORDSIZE32_SIZE_ULONG 0 # define __WORDSIZE32_PTRDIFF_LONG 0 -# define __WORDSIZE_TIME64_COMPAT32 0 #endif +#define __WORDSIZE_TIME64_COMPAT32 1 diff --git a/sysdeps/x86/bits/wordsize.h b/sysdeps/x86/bits/wordsize.h index 70f652bca1..3f40aa76f9 100644 --- a/sysdeps/x86/bits/wordsize.h +++ b/sysdeps/x86/bits/wordsize.h @@ -8,10 +8,9 @@ #define __WORDSIZE32_PTRDIFF_LONG 0 #endif +#define __WORDSIZE_TIME64_COMPAT32 1 + #ifdef __x86_64__ -# define __WORDSIZE_TIME64_COMPAT32 1 /* Both x86-64 and x32 use the 64-bit system call interface. */ # define __SYSCALL_WORDSIZE 64 -#else -# define __WORDSIZE_TIME64_COMPAT32 0 #endif commit acc56074b0a5127631a64640aef1b7c5c103ebd8 Author: Florian Weimer Date: Thu May 2 17:06:19 2024 +0200 nscd: Use time_t for return type of addgetnetgrentX Using int may give false results for future dates (timeouts after the year 2028). Fixes commit 04a21e050d64a1193a6daab872bca2528bda44b ("CVE-2024-33601, CVE-2024-33602: nscd: netgroup: Use two buffers in addgetnetgrentX (bug 31680)"). Reviewed-by: Carlos O'Donell (cherry picked from commit 4bbca1a44691a6e9adcee5c6798a707b626bc331) diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c index e8fe041846..01d554af9c 100644 --- a/nscd/netgroupcache.c +++ b/nscd/netgroupcache.c @@ -680,8 +680,8 @@ readdinnetgr (struct database_dyn *db, struct hashentry *he, .key_len = he->len }; - int timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner, - he, dh); + time_t timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner, + he, dh); if (timeout < 0) timeout = 0; return timeout; commit 273a835fe7c685cc54266bb8b502787bad5e9bae Author: Carlos O'Donell Date: Tue Apr 23 13:30:37 2024 -0400 time: Allow later version licensing. The FSF's Licensing and Compliance Lab noted a discrepancy in the licensing of several files in the glibc package. When timespect_get.c was impelemented the license did not include the standard ", or (at your option) any later version." text. Change the license in timespec_get.c and all copied files to match the expected license. This change was previously approved in principle by the FSF in RT ticket #1316403. And a similar instance was fixed in commit 46703efa02f6ddebce5ee54c92f7c32598de0de6. (cherry picked from commit 91695ee4598b39d181ab8df579b888a8863c4cab) diff --git a/sysdeps/unix/sysv/linux/timespec_get.c b/sysdeps/unix/sysv/linux/timespec_get.c index c6e5e66289..778d1e3354 100644 --- a/sysdeps/unix/sysv/linux/timespec_get.c +++ b/sysdeps/unix/sysv/linux/timespec_get.c @@ -5,7 +5,7 @@ The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either - version 2.1 of the License. + version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of diff --git a/sysdeps/unix/sysv/linux/timespec_getres.c b/sysdeps/unix/sysv/linux/timespec_getres.c index 5acebe2a2c..2eef9e512c 100644 --- a/sysdeps/unix/sysv/linux/timespec_getres.c +++ b/sysdeps/unix/sysv/linux/timespec_getres.c @@ -5,7 +5,7 @@ The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either - version 2.1 of the License. + version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of diff --git a/time/timespec_get.c b/time/timespec_get.c index b031e42ca2..26a044bca6 100644 --- a/time/timespec_get.c +++ b/time/timespec_get.c @@ -4,7 +4,7 @@ The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either - version 2.1 of the License. + version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of diff --git a/time/timespec_getres.c b/time/timespec_getres.c index edb397507c..2e18b8bcac 100644 --- a/time/timespec_getres.c +++ b/time/timespec_getres.c @@ -5,7 +5,7 @@ The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either - version 2.1 of the License. + version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of commit 3148714ab61ad61281bae5a30f530d637034ac3b Author: Gabi Falk Date: Tue Apr 30 20:05:02 2024 +0000 i586: Fix multiple definitions of __memcpy_chk and __mempcpy_chk /home/bmg/install/compilers/x86_64-linux-gnu/lib/gcc/x86_64-glibc-linux-gnu/13.2.1/../../../../x86_64-glibc-linux-gnu/bin/ld: /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(memcpy_chk.o): in function `__memcpy_chk': /home/bmg/src/glibc/debug/../sysdeps/i386/memcpy_chk.S:29: multiple definition of `__memcpy_chk';/home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(memcpy.o):/home/bmg/src/glibc/string/../sysdeps/i386/i586/memcpy.S:31: first defined here /home/bmg/install/compilers/x86_64-linux-gnu/lib/gcc/x86_64-glibc-linux-gnu/13.2.1/../../../../x86_64-glibc-linux-gnu/bin/ld: /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(mempcpy_chk.o): in function `__mempcpy_chk': /home/bmg/src/glibc/debug/../sysdeps/i386/mempcpy_chk.S:28: multiple definition of `__mempcpy_chk'; /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(mempcpy.o):/home/bmg/src/glibc/string/../sysdeps/i386/i586/memcpy.S:31: first defined here After this change, the static library built for i586, regardless of PIC options, contains implementations of these functions respectively from sysdeps/i386/memcpy_chk.S and sysdeps/i386/mempcpy_chk.S. This ensures that memcpy and mempcpy won't pull in __chk_fail and the routines it calls. Reported-by: Florian Weimer Signed-off-by: Gabi Falk Reviewed-by: H.J. Lu Reviewed-by: Dmitry V. Levin (cherry picked from commit 789894a2f554d4503ecb2f13b2b4e93e43414f33) diff --git a/sysdeps/i386/i586/memcpy.S b/sysdeps/i386/i586/memcpy.S index 3e26f112d6..79856d498a 100644 --- a/sysdeps/i386/i586/memcpy.S +++ b/sysdeps/i386/i586/memcpy.S @@ -26,7 +26,7 @@ #define LEN SRC+4 .text -#if defined PIC && IS_IN (libc) +#if defined SHARED && IS_IN (libc) ENTRY (__memcpy_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) commit ad92c483a4bd34db1cfb3eb625212ea64848244f Author: Gabi Falk Date: Tue Apr 30 20:05:03 2024 +0000 i686: Fix multiple definitions of __memmove_chk and __memset_chk Commit c73c96a4a1af1326df7f96eec58209e1e04066d8 updated memcpy.S and mempcpy.S, but omitted memmove.S and memset.S. As a result, the static library built as PIC, whether with or without multiarch support, contains two definitions for each of the __memmove_chk and __memset_chk symbols. /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../i686-pc-linux-gnu/bin/ld: /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../lib/libc.a(memset-ia32.o): in function `__memset_chk': /var/tmp/portage/sys-libs/glibc-2.39-r3/work/glibc-2.39/string/../sysdeps/i386/i686/memset.S:32: multiple definition of `__memset_chk'; /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../lib/libc.a(memset_chk.o):/var/tmp/portage/sys-libs/glibc-2.39-r3/work/glibc-2.39/debug/../sysdeps/i386/i686/multiarch/memset_chk.c:24: first defined here After this change, regardless of PIC options, the static library, built for i686 with multiarch contains implementations of these functions respectively from debug/memmove_chk.c and debug/memset_chk.c, and without multiarch contains implementations of these functions respectively from sysdeps/i386/memmove_chk.S and sysdeps/i386/memset_chk.S. This ensures that memmove and memset won't pull in __chk_fail and the routines it calls. Reported-by: Sam James Tested-by: Sam James Fixes: c73c96a4a1 ("i686: Fix build with --disable-multiarch") Signed-off-by: Gabi Falk Reviewed-by: H.J. Lu Reviewed-by: Dmitry V. Levin (cherry picked from commit 5a2cf833f5772d6c37c7adac388dd9af9cc1c4b9) diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S index f230359ad6..effd958120 100644 --- a/sysdeps/i386/i686/memmove.S +++ b/sysdeps/i386/i686/memmove.S @@ -29,7 +29,7 @@ #define SRC DEST+4 #define LEN SRC+4 -#if defined PIC && IS_IN (libc) +#if defined SHARED && IS_IN (libc) ENTRY_CHK (__memmove_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S index f02f5a6df7..ab06771ea0 100644 --- a/sysdeps/i386/i686/memset.S +++ b/sysdeps/i386/i686/memset.S @@ -27,7 +27,7 @@ #define LEN CHR+4 .text -#if defined PIC && IS_IN (libc) +#if defined SHARED && IS_IN (libc) ENTRY_CHK (__memset_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) commit ff110b2591f0bdeccd121c3726af19c62d6fb184 Author: Gabi Falk Date: Tue Apr 30 20:05:04 2024 +0000 Add a test to check for duplicate definitions in the static library This change follows two previous fixes addressing multiple definitions of __memcpy_chk and __mempcpy_chk functions on i586, and __memmove_chk and __memset_chk functions on i686. The test is intended to prevent such issues from occurring in the future. Signed-off-by: Gabi Falk Reviewed-by: H.J. Lu Reviewed-by: Dmitry V. Levin (cherry picked from commit ded2e0753e9c46debeb2e0d26c5e560d2581d314) diff --git a/Makefile b/Makefile index 7052b46df8..2e351c0321 100644 --- a/Makefile +++ b/Makefile @@ -577,6 +577,13 @@ $(objpfx)lint-makefiles.out: scripts/lint-makefiles.sh $(SHELL) $< "$(PYTHON)" `pwd` > $@ ; \ $(evaluate-test) +# Link libc.a as a whole to verify that it does not contain multiple +# definitions of any symbols. +tests-special += $(objpfx)link-static-libc.out +$(objpfx)link-static-libc.out: + $(LINK.o) $(whole-archive) -r $(objpfx)libc.a -o /dev/null > $@ 2>&1; \ + $(evaluate-test) + # Print test summary for tests in $1 .sum file; # $2 is optional test identifier. # Fail if there are unexpected failures in the test results. commit fa616ea3730cb42046d19f28d611be0bc390af7c Author: Sam James Date: Sat May 4 13:28:13 2024 +0100 Revert "Add a test to check for duplicate definitions in the static library" This reverts commit ff110b2591f0bdeccd121c3726af19c62d6fb184. I had the wrong cherry-pick reference (the commit content is right; it's just referring to a base that isn't upstream), but let's revert and reapply for clarity. Signed-off-by: Sam James diff --git a/Makefile b/Makefile index 2e351c0321..7052b46df8 100644 --- a/Makefile +++ b/Makefile @@ -577,13 +577,6 @@ $(objpfx)lint-makefiles.out: scripts/lint-makefiles.sh $(SHELL) $< "$(PYTHON)" `pwd` > $@ ; \ $(evaluate-test) -# Link libc.a as a whole to verify that it does not contain multiple -# definitions of any symbols. -tests-special += $(objpfx)link-static-libc.out -$(objpfx)link-static-libc.out: - $(LINK.o) $(whole-archive) -r $(objpfx)libc.a -o /dev/null > $@ 2>&1; \ - $(evaluate-test) - # Print test summary for tests in $1 .sum file; # $2 is optional test identifier. # Fail if there are unexpected failures in the test results. commit c16871e662cd0f3370173d916864b19e69f1bc9a Author: Sam James Date: Sat May 4 13:28:51 2024 +0100 Revert "i686: Fix multiple definitions of __memmove_chk and __memset_chk" This reverts commit ad92c483a4bd34db1cfb3eb625212ea64848244f. I had the wrong cherry-pick reference (the commit content is right; it's just referring to a base that isn't upstream), but let's revert and reapply for clarity. Signed-off-by: Sam James diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S index effd958120..f230359ad6 100644 --- a/sysdeps/i386/i686/memmove.S +++ b/sysdeps/i386/i686/memmove.S @@ -29,7 +29,7 @@ #define SRC DEST+4 #define LEN SRC+4 -#if defined SHARED && IS_IN (libc) +#if defined PIC && IS_IN (libc) ENTRY_CHK (__memmove_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S index ab06771ea0..f02f5a6df7 100644 --- a/sysdeps/i386/i686/memset.S +++ b/sysdeps/i386/i686/memset.S @@ -27,7 +27,7 @@ #define LEN CHR+4 .text -#if defined SHARED && IS_IN (libc) +#if defined PIC && IS_IN (libc) ENTRY_CHK (__memset_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) commit 5141d4d83c17406f0eaea3e345ef2b52e10f386e Author: Sam James Date: Sat May 4 13:28:54 2024 +0100 Revert "i586: Fix multiple definitions of __memcpy_chk and __mempcpy_chk" This reverts commit 3148714ab61ad61281bae5a30f530d637034ac3b. I had the wrong cherry-pick reference (the commit content is right; it's just referring to a base that isn't upstream), but let's revert and reapply for clarity. Signed-off-by: Sam James diff --git a/sysdeps/i386/i586/memcpy.S b/sysdeps/i386/i586/memcpy.S index 79856d498a..3e26f112d6 100644 --- a/sysdeps/i386/i586/memcpy.S +++ b/sysdeps/i386/i586/memcpy.S @@ -26,7 +26,7 @@ #define LEN SRC+4 .text -#if defined SHARED && IS_IN (libc) +#if defined PIC && IS_IN (libc) ENTRY (__memcpy_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) commit 8323a83abd73446dc434aceff66219712c09140b Author: Gabi Falk Date: Tue Apr 30 20:05:02 2024 +0000 i586: Fix multiple definitions of __memcpy_chk and __mempcpy_chk /home/bmg/install/compilers/x86_64-linux-gnu/lib/gcc/x86_64-glibc-linux-gnu/13.2.1/../../../../x86_64-glibc-linux-gnu/bin/ld: /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(memcpy_chk.o): in function `__memcpy_chk': /home/bmg/src/glibc/debug/../sysdeps/i386/memcpy_chk.S:29: multiple definition of `__memcpy_chk';/home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(memcpy.o):/home/bmg/src/glibc/string/../sysdeps/i386/i586/memcpy.S:31: first defined here /home/bmg/install/compilers/x86_64-linux-gnu/lib/gcc/x86_64-glibc-linux-gnu/13.2.1/../../../../x86_64-glibc-linux-gnu/bin/ld: /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(mempcpy_chk.o): in function `__mempcpy_chk': /home/bmg/src/glibc/debug/../sysdeps/i386/mempcpy_chk.S:28: multiple definition of `__mempcpy_chk'; /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(mempcpy.o):/home/bmg/src/glibc/string/../sysdeps/i386/i586/memcpy.S:31: first defined here After this change, the static library built for i586, regardless of PIC options, contains implementations of these functions respectively from sysdeps/i386/memcpy_chk.S and sysdeps/i386/mempcpy_chk.S. This ensures that memcpy and mempcpy won't pull in __chk_fail and the routines it calls. Reported-by: Florian Weimer Signed-off-by: Gabi Falk Reviewed-by: H.J. Lu Reviewed-by: Dmitry V. Levin (cherry picked from commit 0fdf4ba48ccce5abf567340b0ab8fa8ed8a9bc6e) diff --git a/sysdeps/i386/i586/memcpy.S b/sysdeps/i386/i586/memcpy.S index 3e26f112d6..79856d498a 100644 --- a/sysdeps/i386/i586/memcpy.S +++ b/sysdeps/i386/i586/memcpy.S @@ -26,7 +26,7 @@ #define LEN SRC+4 .text -#if defined PIC && IS_IN (libc) +#if defined SHARED && IS_IN (libc) ENTRY (__memcpy_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) commit 8b005d7869debac4d5cd67f65e49a0fad89da9ad Author: Gabi Falk Date: Tue Apr 30 20:05:03 2024 +0000 i686: Fix multiple definitions of __memmove_chk and __memset_chk Commit c73c96a4a1af1326df7f96eec58209e1e04066d8 updated memcpy.S and mempcpy.S, but omitted memmove.S and memset.S. As a result, the static library built as PIC, whether with or without multiarch support, contains two definitions for each of the __memmove_chk and __memset_chk symbols. /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../i686-pc-linux-gnu/bin/ld: /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../lib/libc.a(memset-ia32.o): in function `__memset_chk': /var/tmp/portage/sys-libs/glibc-2.39-r3/work/glibc-2.39/string/../sysdeps/i386/i686/memset.S:32: multiple definition of `__memset_chk'; /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../lib/libc.a(memset_chk.o):/var/tmp/portage/sys-libs/glibc-2.39-r3/work/glibc-2.39/debug/../sysdeps/i386/i686/multiarch/memset_chk.c:24: first defined here After this change, regardless of PIC options, the static library, built for i686 with multiarch contains implementations of these functions respectively from debug/memmove_chk.c and debug/memset_chk.c, and without multiarch contains implementations of these functions respectively from sysdeps/i386/memmove_chk.S and sysdeps/i386/memset_chk.S. This ensures that memmove and memset won't pull in __chk_fail and the routines it calls. Reported-by: Sam James Tested-by: Sam James Fixes: c73c96a4a1 ("i686: Fix build with --disable-multiarch") Signed-off-by: Gabi Falk Reviewed-by: H.J. Lu Reviewed-by: Dmitry V. Levin (cherry picked from commit 5a2cf833f5772d6c37c7adac388dd9af9cc1c4b9) diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S index f230359ad6..effd958120 100644 --- a/sysdeps/i386/i686/memmove.S +++ b/sysdeps/i386/i686/memmove.S @@ -29,7 +29,7 @@ #define SRC DEST+4 #define LEN SRC+4 -#if defined PIC && IS_IN (libc) +#if defined SHARED && IS_IN (libc) ENTRY_CHK (__memmove_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S index f02f5a6df7..ab06771ea0 100644 --- a/sysdeps/i386/i686/memset.S +++ b/sysdeps/i386/i686/memset.S @@ -27,7 +27,7 @@ #define LEN CHR+4 .text -#if defined PIC && IS_IN (libc) +#if defined SHARED && IS_IN (libc) ENTRY_CHK (__memset_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) commit f8e462342189525e4605cf233b8f798d1c7f398d Author: Gabi Falk Date: Tue Apr 30 20:05:04 2024 +0000 Add a test to check for duplicate definitions in the static library This change follows two previous fixes addressing multiple definitions of __memcpy_chk and __mempcpy_chk functions on i586, and __memmove_chk and __memset_chk functions on i686. The test is intended to prevent such issues from occurring in the future. Signed-off-by: Gabi Falk Reviewed-by: H.J. Lu Reviewed-by: Dmitry V. Levin (cherry picked from commit ded2e0753e9c46debeb2e0d26c5e560d2581d314) diff --git a/Makefile b/Makefile index 7052b46df8..2e351c0321 100644 --- a/Makefile +++ b/Makefile @@ -577,6 +577,13 @@ $(objpfx)lint-makefiles.out: scripts/lint-makefiles.sh $(SHELL) $< "$(PYTHON)" `pwd` > $@ ; \ $(evaluate-test) +# Link libc.a as a whole to verify that it does not contain multiple +# definitions of any symbols. +tests-special += $(objpfx)link-static-libc.out +$(objpfx)link-static-libc.out: + $(LINK.o) $(whole-archive) -r $(objpfx)libc.a -o /dev/null > $@ 2>&1; \ + $(evaluate-test) + # Print test summary for tests in $1 .sum file; # $2 is optional test identifier. # Fail if there are unexpected failures in the test results.