Skip to content

Commit

Permalink
Some cleanups in *axpy assembler code
Browse files Browse the repository at this point in the history
  • Loading branch information
michalderkacz committed Apr 3, 2012
1 parent 519970a commit 2f98de8
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 22 deletions.
28 changes: 17 additions & 11 deletions daxpy_amd64.s
Expand Up @@ -98,7 +98,7 @@ TEXT ·Daxpy(SB), 7, $0


SUBQ $4, BP SUBQ $4, BP
JGE full_simd_loop_sum // There are 4 or more pairs to process JGE full_simd_loop_sum // There are 4 or more pairs to process
JMP rest JMP rest_sum


full_simd_loop_diff: full_simd_loop_diff:
// Load first two pairs // Load first two pairs
Expand All @@ -120,7 +120,7 @@ TEXT ·Daxpy(SB), 7, $0


SUBQ $4, BP SUBQ $4, BP
JGE full_simd_loop_diff // There are 4 or more pairs to process JGE full_simd_loop_diff // There are 4 or more pairs to process
JMP rest JMP rest_diff


with_stride: with_stride:
// Setup long strides // Setup long strides
Expand Down Expand Up @@ -200,7 +200,7 @@ with_stride:


SUBQ $4, BP SUBQ $4, BP
JGE half_simd_loop // There are 4 or more pairs to process JGE half_simd_loop // There are 4 or more pairs to process
JMP rest JMP rest_sum


half_simd_loop_diff: half_simd_loop_diff:
// Load first two pairs // Load first two pairs
Expand Down Expand Up @@ -233,19 +233,13 @@ with_stride:


SUBQ $4, BP SUBQ $4, BP
JGE half_simd_loop // There are 4 or more pairs to process JGE half_simd_loop // There are 4 or more pairs to process
JMP rest_diff


rest: rest:
// Undo last SUBQ // Undo last SUBQ
ADDQ $4, BP ADDQ $4, BP

// Check that are there any value to process // Check that are there any value to process
JE end JE end

UCOMISD X0, X1
JE loop_sum // alpha == 1
UCOMISD X0, X7
JE loop_diff // alpha == -1

loop: loop:
// Load from X and scale // Load from X and scale
MOVSD (SI), X2 MOVSD (SI), X2
Expand All @@ -260,8 +254,13 @@ rest:


DECQ BP DECQ BP
JNE loop JNE loop
end:
RET RET

rest_sum:
// Undo last SUBQ
ADDQ $4, BP
// Check that are there any value to process
JE end
loop_sum: loop_sum:
// Load from X // Load from X
MOVSD (SI), X2 MOVSD (SI), X2
Expand All @@ -276,6 +275,12 @@ end:
DECQ BP DECQ BP
JNE loop_sum JNE loop_sum
RET RET

rest_diff:
// Undo last SUBQ
ADDQ $4, BP
// Check that are there any value to process
JE end
loop_diff: loop_diff:
// Load from Y // Load from Y
MOVSD (DI), X2 MOVSD (DI), X2
Expand All @@ -293,4 +298,5 @@ end:


panic: panic:
CALL runtime·panicindex(SB) CALL runtime·panicindex(SB)
end:
RET RET
28 changes: 17 additions & 11 deletions saxpy_amd64.s
Expand Up @@ -85,7 +85,7 @@ TEXT ·Saxpy(SB), 7, $0


SUBQ $4, BP SUBQ $4, BP
JGE full_simd_loop_sum // There are 4 or more pairs to process JGE full_simd_loop_sum // There are 4 or more pairs to process
JMP rest JMP rest_sum


full_simd_loop_diff: full_simd_loop_diff:
// Load four pairs // Load four pairs
Expand All @@ -101,7 +101,7 @@ TEXT ·Saxpy(SB), 7, $0


SUBQ $4, BP SUBQ $4, BP
JGE full_simd_loop_diff // There are 4 or more pairs to process JGE full_simd_loop_diff // There are 4 or more pairs to process
JMP rest JMP rest_diff


with_stride: with_stride:
// Setup long strides // Setup long strides
Expand Down Expand Up @@ -194,7 +194,7 @@ with_stride:


SUBQ $4, BP SUBQ $4, BP
JGE half_simd_loop_sum // There are 4 or more pairs to process JGE half_simd_loop_sum // There are 4 or more pairs to process
JMP rest JMP rest_sum


half_simd_loop_diff: half_simd_loop_diff:
MOVSS (DI), X2 MOVSS (DI), X2
Expand All @@ -221,19 +221,13 @@ with_stride:


SUBQ $4, BP SUBQ $4, BP
JGE half_simd_loop_diff // There are 4 or more pairs to process JGE half_simd_loop_diff // There are 4 or more pairs to process
JMP rest_diff


rest: rest:
// Undo last SUBQ // Undo last SUBQ
ADDQ $4, BP ADDQ $4, BP

// Check that are there any value to process // Check that are there any value to process
JE end JE end

UCOMISS X0, X1
JE loop_sum // alpha == 1
UCOMISS X0, X8
JE loop_diff // alpha == -1

loop: loop:
// Load from X and scale // Load from X and scale
MOVSS (SI), X2 MOVSS (SI), X2
Expand All @@ -248,8 +242,13 @@ rest:


DECQ BP DECQ BP
JNE loop JNE loop
end:
RET RET

rest_sum:
// Undo last SUBQ
ADDQ $4, BP
// Check that are there any value to process
JE end
loop_sum: loop_sum:
// Load from X // Load from X
MOVSS (SI), X2 MOVSS (SI), X2
Expand All @@ -264,6 +263,12 @@ end:
DECQ BP DECQ BP
JNE loop_sum JNE loop_sum
RET RET

rest_diff:
// Undo last SUBQ
ADDQ $4, BP
// Check that are there any value to process
JE end
loop_diff: loop_diff:
// Load from Y // Load from Y
MOVSS (DI), X2 MOVSS (DI), X2
Expand All @@ -281,4 +286,5 @@ end:


panic: panic:
CALL runtime·panicindex(SB) CALL runtime·panicindex(SB)
end:
RET RET

0 comments on commit 2f98de8

Please sign in to comment.