Skip to content

Commit

Permalink
Define alternate kernels for big-endian PPC970
Browse files Browse the repository at this point in the history
The altivec versions of SGEMM and CGEMM fail most test in LAPACK-TESTING when compiled for big endian, STRSM/CTRSM even cause segfaults. The rot kernels either fail the corresponding utest or lead to failures in LAPACK-TESTING.
  • Loading branch information
martin-frbg committed Nov 17, 2019
1 parent 6082e55 commit b3ac6ee
Showing 1 changed file with 45 additions and 10 deletions.
55 changes: 45 additions & 10 deletions kernel/power/KERNEL.PPC970
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
SGEMMKERNEL = gemm_kernel.S
SGEMMINCOPY =
SGEMMITCOPY =
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMINCOPYOBJ =
SGEMMITCOPYOBJ =
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
else
SGEMMKERNEL = gemm_kernel_altivec.S
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
Expand All @@ -7,6 +18,8 @@ SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
endif

DGEMMKERNEL = gemm_kernel.S
DGEMMINCOPY =
DGEMMITCOPY =
Expand All @@ -16,6 +29,18 @@ DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)

ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
CGEMMKERNEL = zgemm_kernel.S
CGEMMINCOPY =
CGEMMITCOPY =
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMINCOPYOBJ =
CGEMMITCOPYOBJ =
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
else
CGEMMKERNEL = zgemm_kernel_altivec.S
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
Expand All @@ -25,6 +50,8 @@ CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
endif

ZGEMMKERNEL = zgemm_kernel.S
ZGEMMINCOPY =
ZGEMMITCOPY =
Expand All @@ -35,22 +62,30 @@ ZGEMMITCOPYOBJ =
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)

#STRSMKERNEL_LN = trsm_kernel_LN.S
#STRSMKERNEL_LT = trsm_kernel_LT.S
#STRSMKERNEL_RN = trsm_kernel_LT.S
#STRSMKERNEL_RT = trsm_kernel_RT.S

DTRSMKERNEL_LN = trsm_kernel_LN.S
DTRSMKERNEL_LT = trsm_kernel_LT.S
DTRSMKERNEL_RN = trsm_kernel_LT.S
DTRSMKERNEL_RT = trsm_kernel_RT.S

#CTRSMKERNEL_LN = ztrsm_kernel_LN.S
#CTRSMKERNEL_LT = ztrsm_kernel_LT.S
#CTRSMKERNEL_RN = ztrsm_kernel_LT.S
#CTRSMKERNEL_RT = ztrsm_kernel_RT.S

ZTRSMKERNEL_LN = ztrsm_kernel_LN.S
ZTRSMKERNEL_LT = ztrsm_kernel_LT.S
ZTRSMKERNEL_RN = ztrsm_kernel_LT.S
ZTRSMKERNEL_RT = ztrsm_kernel_RT.S

ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
STRSMKERNEL_LN = trsm_kernel_LN.S
STRSMKERNEL_LT = trsm_kernel_LT.S
STRSMKERNEL_RN = trsm_kernel_LT.S
STRSMKERNEL_RT = trsm_kernel_RT.S

CTRSMKERNEL_LN = ztrsm_kernel_LN.S
CTRSMKERNEL_LT = ztrsm_kernel_LT.S
CTRSMKERNEL_RN = ztrsm_kernel_LT.S
CTRSMKERNEL_RT = ztrsm_kernel_RT.S


SROTKERNEL = ../arm/rot.c
DROTKERNEL = ../arm/rot.c
CROTKERNEL = ../arm/zrot.c
ZROTKERNEL = ../arm/zrot.c
endif

0 comments on commit b3ac6ee

Please sign in to comment.