/
KERNEL.POWER9
184 lines (167 loc) · 5.21 KB
/
KERNEL.POWER9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#SGEMM_BETA = ../generic/gemm_beta.c
#DGEMM_BETA = ../generic/gemm_beta.c
#CGEMM_BETA = ../generic/zgemm_beta.c
#ZGEMM_BETA = ../generic/zgemm_beta.c
STRMMKERNEL = sgemm_kernel_power9.S
DTRMMKERNEL = dgemm_kernel_power9.S
CTRMMKERNEL = ctrmm_kernel_8x4_power8.S
ZTRMMKERNEL = zgemm_kernel_power9.S
SGEMMKERNEL = sgemm_kernel_power9.S
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
SGEMMITCOPY = sgemm_tcopy_16_power8.S
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
SGEMMOTCOPY = sgemm_tcopy_8_power8.S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = dgemm_kernel_power9.S
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
DGEMMITCOPY = dgemm_tcopy_16_power8.S
DGEMMONCOPY = dgemm_ncopy_4_power8.S
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
DGEMMINCOPYOBJ = dgemm_incopy.o
DGEMMITCOPYOBJ = dgemm_itcopy.o
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
CGEMMKERNEL = cgemm_kernel_8x4_power8.S
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
CGEMMITCOPY = cgemm_tcopy_8_power8.S
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
CGEMMINCOPYOBJ = cgemm_incopy.o
CGEMMITCOPYOBJ = cgemm_itcopy.o
ZGEMMKERNEL = zgemm_kernel_power9.S
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c
ZGEMMITCOPY = zgemm_tcopy_8_power8.S
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
ZGEMMINCOPYOBJ = zgemm_incopy.o
ZGEMMITCOPYOBJ = zgemm_itcopy.o
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
#Todo: CGEMM3MKERNEL should be 4x4 blocksizes.
#CGEMM3MKERNEL = zgemm3m_kernel_8x4_sse3.S
#ZGEMM3MKERNEL = zgemm3m_kernel_4x4_sse3.S
#Pure C for other kernels
#SAMAXKERNEL = ../arm/amax.c
#DAMAXKERNEL = ../arm/amax.c
#CAMAXKERNEL = ../arm/zamax.c
#ZAMAXKERNEL = ../arm/zamax.c
#
#SAMINKERNEL = ../arm/amin.c
#DAMINKERNEL = ../arm/amin.c
#CAMINKERNEL = ../arm/zamin.c
#ZAMINKERNEL = ../arm/zamin.c
#
#SMAXKERNEL = ../arm/max.c
#DMAXKERNEL = ../arm/max.c
#
#SMINKERNEL = ../arm/min.c
#DMINKERNEL = ../arm/min.c
#
ISAMAXKERNEL = isamax.c
IDAMAXKERNEL = idamax.c
ICAMAXKERNEL = icamax.c
IZAMAXKERNEL = izamax.c
#
ISAMINKERNEL = isamin.c
IDAMINKERNEL = idamin.c
ICAMINKERNEL = icamin.c
IZAMINKERNEL = izamin.c
#
#ISMAXKERNEL = ../arm/imax.c
#IDMAXKERNEL = ../arm/imax.c
#
#ISMINKERNEL = ../arm/imin.c
#IDMINKERNEL = ../arm/imin.c
#
SASUMKERNEL = sasum.c
DASUMKERNEL = dasum.c
CASUMKERNEL = casum.c
ZASUMKERNEL = zasum.c
#
SAXPYKERNEL = saxpy.c
DAXPYKERNEL = daxpy.c
CAXPYKERNEL = caxpy.c
ZAXPYKERNEL = zaxpy.c
#
SCOPYKERNEL = scopy.c
DCOPYKERNEL = dcopy.c
CCOPYKERNEL = ccopy.c
ZCOPYKERNEL = zcopy.c
#
SDOTKERNEL = sdot.c
DDOTKERNEL = ddot.c
DSDOTKERNEL = sdot.c
CDOTKERNEL = cdot.c
ZDOTKERNEL = zdot.c
#
SNRM2KERNEL = ../arm/nrm2.c
DNRM2KERNEL = ../arm/nrm2.c
CNRM2KERNEL = ../arm/znrm2.c
ZNRM2KERNEL = ../arm/znrm2.c
#
SROTKERNEL = srot.c
DROTKERNEL = drot.c
CROTKERNEL = crot.c
ZROTKERNEL = zrot.c
#
SSCALKERNEL = sscal.c
DSCALKERNEL = dscal.c
CSCALKERNEL = zscal.c
ZSCALKERNEL = zscal.c
#
SSWAPKERNEL = sswap.c
DSWAPKERNEL = dswap.c
CSWAPKERNEL = cswap.c
ZSWAPKERNEL = zswap.c
#
SGEMVNKERNEL = sgemv_n.c
DGEMVNKERNEL = dgemv_n.c
CGEMVNKERNEL = cgemv_n.c
ZGEMVNKERNEL = zgemv_n_4.c
#
SGEMVTKERNEL = sgemv_t.c
DGEMVTKERNEL = dgemv_t.c
CGEMVTKERNEL = cgemv_t.c
ZGEMVTKERNEL = zgemv_t_4.c
#SSYMV_U_KERNEL = ../generic/symv_k.c
#SSYMV_L_KERNEL = ../generic/symv_k.c
#DSYMV_U_KERNEL = ../generic/symv_k.c
#DSYMV_L_KERNEL = ../generic/symv_k.c
#QSYMV_U_KERNEL = ../generic/symv_k.c
#QSYMV_L_KERNEL = ../generic/symv_k.c
#CSYMV_U_KERNEL = ../generic/zsymv_k.c
#CSYMV_L_KERNEL = ../generic/zsymv_k.c
#ZSYMV_U_KERNEL = ../generic/zsymv_k.c
#ZSYMV_L_KERNEL = ../generic/zsymv_k.c
#XSYMV_U_KERNEL = ../generic/zsymv_k.c
#XSYMV_L_KERNEL = ../generic/zsymv_k.c
#ZHEMV_U_KERNEL = ../generic/zhemv_k.c
#ZHEMV_L_KERNEL = ../generic/zhemv_k.c
LSAME_KERNEL = ../generic/lsame.c
SCABS_KERNEL = ../generic/cabs.c
DCABS_KERNEL = ../generic/cabs.c
QCABS_KERNEL = ../generic/cabs.c
#Dump kernel
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c