Skip to content

Commit

Permalink
Refs #173. Fixed overflow internal buffer bug of gemv_t on x86.
Browse files Browse the repository at this point in the history
  • Loading branch information
xianyi committed Dec 23, 2012
1 parent a4ee6f3 commit fd3046b
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 37 deletions.
69 changes: 58 additions & 11 deletions kernel/x86/gemv_t_sse.S
Expand Up @@ -89,17 +89,23 @@
#endif

#define STACKSIZE 16

#define M 4 + STACKSIZE(%esp)
#define N 8 + STACKSIZE(%esp)
#define ALPHA 16 + STACKSIZE(%esp)
#define A 20 + STACKSIZE(%esp)
#define STACK_LDA 24 + STACKSIZE(%esp)
#define STACK_X 28 + STACKSIZE(%esp)
#define STACK_INCX 32 + STACKSIZE(%esp)
#define Y 36 + STACKSIZE(%esp)
#define STACK_INCY 40 + STACKSIZE(%esp)
#define BUFFER 44 + STACKSIZE(%esp)
#define ARGS 16

#define M 4 + STACKSIZE+ARGS(%esp)
#define N 8 + STACKSIZE+ARGS(%esp)
#define ALPHA 16 + STACKSIZE+ARGS(%esp)
#define A 20 + STACKSIZE+ARGS(%esp)
#define STACK_LDA 24 + STACKSIZE+ARGS(%esp)
#define STACK_X 28 + STACKSIZE+ARGS(%esp)
#define STACK_INCX 32 + STACKSIZE+ARGS(%esp)
#define Y 36 + STACKSIZE+ARGS(%esp)
#define STACK_INCY 40 + STACKSIZE+ARGS(%esp)
#define BUFFER 44 + STACKSIZE+ARGS(%esp)

#define MMM 0+STACKSIZE(%esp)
#define NN 4+STACKSIZE(%esp)
#define AA 8+STACKSIZE(%esp)
#define LDAX 12+STACKSIZE(%esp)

#define I %eax
#define J %ebx
Expand All @@ -114,6 +120,7 @@

PROLOGUE

subl $ARGS,%esp
pushl %ebp
pushl %edi
pushl %esi
Expand All @@ -122,6 +129,37 @@
PROFCODE

movl STACK_LDA, LDA
movl LDA,LDAX # backup LDA
movl N,J
movl J,NN # backup N
movl A,J
movl J,AA # backup A
movl M,J
movl J,MMM # mov M to MMM
.L0t:
xorl J,J
addl $1,J
sall $23,J # J=2^22
subl J,MMM # MMM=MMM-J
movl J,M
jge .L00t
ALIGN_4

movl MMM,%eax
addl J,%eax
jle .L999x
movl %eax,M

.L00t:
movl AA,%eax
movl %eax,A # mov AA to A

movl NN,%eax
movl %eax,N # reset N


movl LDAX, LDA # reset LDA

movl STACK_X, X
movl STACK_INCX, INCX
movl STACK_INCY, INCY
Expand Down Expand Up @@ -628,10 +666,19 @@
ALIGN_4

.L999:
movl M,J
leal (,J,SIZE),%eax
addl %eax,AA
jmp .L0t
ALIGN_4

.L999x:
popl %ebx
popl %esi
popl %edi
popl %ebp

addl $ARGS,%esp
ret

EPILOGUE
71 changes: 60 additions & 11 deletions kernel/x86/gemv_t_sse2.S
Expand Up @@ -76,18 +76,24 @@
#endif

#define STACKSIZE 16
#define ARGS 16

#define M 4 + STACKSIZE+ARGS(%esp)
#define N 8 + STACKSIZE+ARGS(%esp)
#define ALPHA 16 + STACKSIZE+ARGS(%esp)
#define A 24 + STACKSIZE+ARGS(%esp)
#define STACK_LDA 28 + STACKSIZE+ARGS(%esp)
#define STACK_X 32 + STACKSIZE+ARGS(%esp)
#define STACK_INCX 36 + STACKSIZE+ARGS(%esp)
#define Y 40 + STACKSIZE+ARGS(%esp)
#define STACK_INCY 44 + STACKSIZE+ARGS(%esp)
#define BUFFER 48 + STACKSIZE+ARGS(%esp)

#define MMM 0+STACKSIZE(%esp)
#define AA 4+STACKSIZE(%esp)
#define LDAX 8+STACKSIZE(%esp)
#define NN 12+STACKSIZE(%esp)

#define M 4 + STACKSIZE(%esp)
#define N 8 + STACKSIZE(%esp)
#define ALPHA 16 + STACKSIZE(%esp)
#define A 24 + STACKSIZE(%esp)
#define STACK_LDA 28 + STACKSIZE(%esp)
#define STACK_X 32 + STACKSIZE(%esp)
#define STACK_INCX 36 + STACKSIZE(%esp)
#define Y 40 + STACKSIZE(%esp)
#define STACK_INCY 44 + STACKSIZE(%esp)
#define BUFFER 48 + STACKSIZE(%esp)

#define I %eax
#define J %ebx

Expand All @@ -101,14 +107,47 @@

PROLOGUE

subl $ARGS,%esp

pushl %ebp
pushl %edi
pushl %esi
pushl %ebx

PROFCODE


movl STACK_LDA, LDA
movl LDA,LDAX # backup LDA
movl N,J
movl J,NN # backup N
movl A,J
movl J,AA # backup A
movl M,J
movl J,MMM # mov M to MMM
.L0t:
xorl J,J
addl $1,J
sall $22,J # J=2^22
subl J,MMM # MMM=MMM-J
movl J,M
jge .L00t
ALIGN_4

movl MMM,%eax
addl J,%eax
jle .L999x
movl %eax,M

.L00t:
movl AA,%eax
movl %eax,A # mov AA to A

movl NN,%eax
movl %eax,N # reset N


movl LDAX, LDA # reset LDA
movl STACK_X, X
movl STACK_INCX, INCX
movl STACK_INCY, INCY
Expand All @@ -117,6 +156,7 @@
leal (,INCY, SIZE), INCY
leal (,LDA, SIZE), LDA


subl $-16 * SIZE, A

cmpl $0, N
Expand Down Expand Up @@ -560,10 +600,19 @@
ALIGN_4

.L999:
movl M,J
leal (,J,SIZE),%eax
addl %eax,AA
jmp .L0t
ALIGN_4

.L999x:
popl %ebx
popl %esi
popl %edi
popl %ebp

addl $ARGS,%esp
ret

EPILOGUE
61 changes: 46 additions & 15 deletions kernel/x86_64/sgemv_t.S
Expand Up @@ -47,7 +47,7 @@

#ifndef WINDOWS_ABI

#define STACKSIZE 64
#define STACKSIZE 128

#define OLD_M %rdi
#define OLD_N %rsi
Expand All @@ -57,6 +57,10 @@
#define STACK_Y 16 + STACKSIZE(%rsp)
#define STACK_INCY 24 + STACKSIZE(%rsp)
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
#define MMM 56(%rsp)
#define NN 64(%rsp)
#define AA 72(%rsp)
#define LDAX 80(%rsp)

#else

Expand All @@ -71,6 +75,10 @@
#define STACK_Y 72 + STACKSIZE(%rsp)
#define STACK_INCY 80 + STACKSIZE(%rsp)
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
#defien MMM 216(%rsp)
#defien NN 224(%rsp)
#define AA 232(%rsp)
#define LDAX 240(%rsp)

#endif

Expand Down Expand Up @@ -127,29 +135,46 @@
movups %xmm14, 192(%rsp)
movups %xmm15, 208(%rsp)

movq OLD_M, M
movq OLD_N, N
movq OLD_A, A
movq OLD_LDA, LDA
movq OLD_M, MMM
movq OLD_N, NN
movq OLD_A, AA
movq OLD_LDA, LDAX
movq OLD_X, X
#else
movq OLD_M, M
movq OLD_N, N
movq OLD_A, A
movq OLD_LDA, LDA
movq OLD_M, MMM
movq OLD_N, NN
movq OLD_A, AA
movq OLD_LDA, LDAX
#endif

movq STACK_INCX, INCX
movq STACK_Y, Y
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER

#ifndef WINDOWS_ABI
pshufd $0, %xmm0, ALPHA
#else
pshufd $0, %xmm3, ALPHA
#endif


.L0t:
xorq M,M
addq $1,M
salq $22,M
subq M,MMM
jge .L00t
ALIGN_4

movq MMM,%rax
addq M,%rax
jle .L999x
movq %rax,M

.L00t:
movq LDAX,LDA
movq NN,N
movq AA,A
movq STACK_INCX, INCX
movq STACK_Y, Y
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER

leaq (,INCX, SIZE), INCX
leaq (,INCY, SIZE), INCY
leaq (,LDA, SIZE), LDA
Expand Down Expand Up @@ -6341,6 +6366,12 @@
ALIGN_4

.L999:
leaq (,M,SIZE),%rax
addq %rax,AA
jmp .L0t
ALIGN_4

.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12
Expand Down

0 comments on commit fd3046b

Please sign in to comment.