-
Notifications
You must be signed in to change notification settings - Fork 102
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
commit 8939c58 upstream. xtensa gcc-13 has changed multiplication handling and may now use __umulsidi3 helper where it used to use __muldi3. As a result building the kernel with the new gcc may fail with the following error: linux/init/main.c:1287: undefined reference to `__umulsidi3' Fix the build by providing __umulsidi3 implementation for xtensa. Cc: stable@vger.kernel.org # 5.18+ Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
- Loading branch information
Showing
3 changed files
with
233 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,230 @@ | ||
/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ | ||
#include <linux/linkage.h> | ||
#include <asm/asmmacro.h> | ||
#include <asm/core.h> | ||
|
||
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 | ||
#define XCHAL_NO_MUL 1 | ||
#endif | ||
|
||
ENTRY(__umulsidi3) | ||
|
||
#ifdef __XTENSA_CALL0_ABI__ | ||
abi_entry(32) | ||
s32i a12, sp, 16 | ||
s32i a13, sp, 20 | ||
s32i a14, sp, 24 | ||
s32i a15, sp, 28 | ||
#elif XCHAL_NO_MUL | ||
/* This is not really a leaf function; allocate enough stack space | ||
to allow CALL12s to a helper function. */ | ||
abi_entry(32) | ||
#else | ||
abi_entry_default | ||
#endif | ||
|
||
#ifdef __XTENSA_EB__ | ||
#define wh a2 | ||
#define wl a3 | ||
#else | ||
#define wh a3 | ||
#define wl a2 | ||
#endif /* __XTENSA_EB__ */ | ||
|
||
/* This code is taken from the mulsf3 routine in ieee754-sf.S. | ||
See more comments there. */ | ||
|
||
#if XCHAL_HAVE_MUL32_HIGH | ||
mull a6, a2, a3 | ||
muluh wh, a2, a3 | ||
mov wl, a6 | ||
|
||
#else /* ! MUL32_HIGH */ | ||
|
||
#if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL | ||
/* a0 and a8 will be clobbered by calling the multiply function | ||
but a8 is not used here and need not be saved. */ | ||
s32i a0, sp, 0 | ||
#endif | ||
|
||
#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 | ||
|
||
#define a2h a4 | ||
#define a3h a5 | ||
|
||
/* Get the high halves of the inputs into registers. */ | ||
srli a2h, a2, 16 | ||
srli a3h, a3, 16 | ||
|
||
#define a2l a2 | ||
#define a3l a3 | ||
|
||
#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 | ||
/* Clear the high halves of the inputs. This does not matter | ||
for MUL16 because the high bits are ignored. */ | ||
extui a2, a2, 0, 16 | ||
extui a3, a3, 0, 16 | ||
#endif | ||
#endif /* MUL16 || MUL32 */ | ||
|
||
|
||
#if XCHAL_HAVE_MUL16 | ||
|
||
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | ||
mul16u dst, xreg ## xhalf, yreg ## yhalf | ||
|
||
#elif XCHAL_HAVE_MUL32 | ||
|
||
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | ||
mull dst, xreg ## xhalf, yreg ## yhalf | ||
|
||
#elif XCHAL_HAVE_MAC16 | ||
|
||
/* The preprocessor insists on inserting a space when concatenating after | ||
a period in the definition of do_mul below. These macros are a workaround | ||
using underscores instead of periods when doing the concatenation. */ | ||
#define umul_aa_ll umul.aa.ll | ||
#define umul_aa_lh umul.aa.lh | ||
#define umul_aa_hl umul.aa.hl | ||
#define umul_aa_hh umul.aa.hh | ||
|
||
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | ||
umul_aa_ ## xhalf ## yhalf xreg, yreg; \ | ||
rsr dst, ACCLO | ||
|
||
#else /* no multiply hardware */ | ||
|
||
#define set_arg_l(dst, src) \ | ||
extui dst, src, 0, 16 | ||
#define set_arg_h(dst, src) \ | ||
srli dst, src, 16 | ||
|
||
#ifdef __XTENSA_CALL0_ABI__ | ||
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | ||
set_arg_ ## xhalf (a13, xreg); \ | ||
set_arg_ ## yhalf (a14, yreg); \ | ||
call0 .Lmul_mulsi3; \ | ||
mov dst, a12 | ||
#else | ||
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | ||
set_arg_ ## xhalf (a14, xreg); \ | ||
set_arg_ ## yhalf (a15, yreg); \ | ||
call12 .Lmul_mulsi3; \ | ||
mov dst, a14 | ||
#endif /* __XTENSA_CALL0_ABI__ */ | ||
|
||
#endif /* no multiply hardware */ | ||
|
||
/* Add pp1 and pp2 into a6 with carry-out in a9. */ | ||
do_mul(a6, a2, l, a3, h) /* pp 1 */ | ||
do_mul(a11, a2, h, a3, l) /* pp 2 */ | ||
movi a9, 0 | ||
add a6, a6, a11 | ||
bgeu a6, a11, 1f | ||
addi a9, a9, 1 | ||
1: | ||
/* Shift the high half of a9/a6 into position in a9. Note that | ||
this value can be safely incremented without any carry-outs. */ | ||
ssai 16 | ||
src a9, a9, a6 | ||
|
||
/* Compute the low word into a6. */ | ||
do_mul(a11, a2, l, a3, l) /* pp 0 */ | ||
sll a6, a6 | ||
add a6, a6, a11 | ||
bgeu a6, a11, 1f | ||
addi a9, a9, 1 | ||
1: | ||
/* Compute the high word into wh. */ | ||
do_mul(wh, a2, h, a3, h) /* pp 3 */ | ||
add wh, wh, a9 | ||
mov wl, a6 | ||
|
||
#endif /* !MUL32_HIGH */ | ||
|
||
#if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL | ||
/* Restore the original return address. */ | ||
l32i a0, sp, 0 | ||
#endif | ||
#ifdef __XTENSA_CALL0_ABI__ | ||
l32i a12, sp, 16 | ||
l32i a13, sp, 20 | ||
l32i a14, sp, 24 | ||
l32i a15, sp, 28 | ||
abi_ret(32) | ||
#else | ||
abi_ret_default | ||
#endif | ||
|
||
#if XCHAL_NO_MUL | ||
|
||
.macro do_addx2 dst, as, at, tmp | ||
#if XCHAL_HAVE_ADDX | ||
addx2 \dst, \as, \at | ||
#else | ||
slli \tmp, \as, 1 | ||
add \dst, \tmp, \at | ||
#endif | ||
.endm | ||
|
||
.macro do_addx4 dst, as, at, tmp | ||
#if XCHAL_HAVE_ADDX | ||
addx4 \dst, \as, \at | ||
#else | ||
slli \tmp, \as, 2 | ||
add \dst, \tmp, \at | ||
#endif | ||
.endm | ||
|
||
.macro do_addx8 dst, as, at, tmp | ||
#if XCHAL_HAVE_ADDX | ||
addx8 \dst, \as, \at | ||
#else | ||
slli \tmp, \as, 3 | ||
add \dst, \tmp, \at | ||
#endif | ||
.endm | ||
|
||
/* For Xtensa processors with no multiply hardware, this simplified | ||
version of _mulsi3 is used for multiplying 16-bit chunks of | ||
the floating-point mantissas. When using CALL0, this function | ||
uses a custom ABI: the inputs are passed in a13 and a14, the | ||
result is returned in a12, and a8 and a15 are clobbered. */ | ||
.align 4 | ||
.Lmul_mulsi3: | ||
abi_entry_default | ||
|
||
.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 | ||
movi \dst, 0 | ||
1: add \tmp1, \src2, \dst | ||
extui \tmp2, \src1, 0, 1 | ||
movnez \dst, \tmp1, \tmp2 | ||
|
||
do_addx2 \tmp1, \src2, \dst, \tmp1 | ||
extui \tmp2, \src1, 1, 1 | ||
movnez \dst, \tmp1, \tmp2 | ||
|
||
do_addx4 \tmp1, \src2, \dst, \tmp1 | ||
extui \tmp2, \src1, 2, 1 | ||
movnez \dst, \tmp1, \tmp2 | ||
|
||
do_addx8 \tmp1, \src2, \dst, \tmp1 | ||
extui \tmp2, \src1, 3, 1 | ||
movnez \dst, \tmp1, \tmp2 | ||
|
||
srli \src1, \src1, 4 | ||
slli \src2, \src2, 4 | ||
bnez \src1, 1b | ||
.endm | ||
|
||
#ifdef __XTENSA_CALL0_ABI__ | ||
mul_mulsi3_body a12, a13, a14, a15, a8 | ||
#else | ||
/* The result will be written into a2, so save that argument in a4. */ | ||
mov a4, a2 | ||
mul_mulsi3_body a2, a4, a3, a5, a6 | ||
#endif | ||
abi_ret_default | ||
#endif /* XCHAL_NO_MUL */ | ||
|
||
ENDPROC(__umulsidi3) |