Permalink
Browse files

Scopy added

  • Loading branch information...
1 parent 0124473 commit 9643be17105ca2c0daa439d0c4fe4f12a7e942c7 @ziutek committed Nov 13, 2011
Showing with 102 additions and 0 deletions.
  1. +2 −0 Makefile
  2. +1 −0 d_test.go
  3. +30 −0 s_test.go
  4. +15 −0 scopy.go
  5. +51 −0 scopy_amd64.s
  6. +3 −0 scopy_decl.go
View
@@ -16,6 +16,7 @@ OFILES_amd64=\
idamax_amd64.$O\
sswap_amd64.$O\
dswap_amd64.$O\
+ scopy_amd64.$O\
dcopy_amd64.$O\
OFILES=\
@@ -34,6 +35,7 @@ ALLGOFILES=\
idamax.go\
sswap.go\
dswap.go\
+ scopy.go\
dcopy.go\
NOGOFILES=\
View
@@ -174,6 +174,7 @@ func BenchmarkDswap(b *testing.B) {
Dswap(len(x), x, 1, y, 1)
}
}
+
func BenchmarkDcopy(b *testing.B) {
b.StopTimer()
y := make([]float64, len(vd))
View
@@ -118,6 +118,27 @@ func TestSswap(t *testing.T) {
}
}
+func TestScopy(t *testing.T) {
+ for inc := 1; inc < 9; inc++ {
+ for N := 0; N <= len(xf)/inc; N++ {
+ a := make([]float32, len(xf))
+ Scopy(N, xf, inc, a, inc)
+ for i := 0; i < inc * N; i++ {
+ if i % inc == 0 {
+ if a[i] != xf[i] {
+ t.Fatalf("inc=%d N=%d i=%d r=%f e=%f", inc, N, i, a[i], xd[i])
+ }
+ } else {
+ if a[i] != 0 {
+ t.Fatalf("inc=%d N=%d i=%d r=%f e=0", inc, N, i, a[i])
+ }
+ }
+ }
+ }
+ }
+}
+
+
var vf, wf []float32
func init() {
@@ -168,3 +189,12 @@ func BenchmarkSswap(b *testing.B) {
Sswap(len(x), x, 1, y, 1)
}
}
+
+func BenchmarkScopy(b *testing.B) {
+ b.StopTimer()
+ y := make([]float32, len(vf))
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ Scopy(len(vf), vf, 1, y, 1)
+ }
+}
View
@@ -0,0 +1,15 @@
+package blas
+
+// Copy the elements of the vectors X and Y.
+func Scopy(N int, X []float32, incX int, Y []float32, incY int) {
+ if incX == 1 && incY == 1 {
+ copy(Y[:N], X[:N])
+ return
+ }
+ var xi, yi int
+ for ; N > 0; N-- {
+ Y[yi] = X[xi]
+ xi += incX
+ yi += incY
+ }
+}
View
@@ -0,0 +1,51 @@
+// func Scopy(N int, X []float32, incX int, Y []float32, incY int)
+TEXT ·Scopy(SB), 7, $0
+ MOVL N+0(FP), CX
+ MOVQ X_data+8(FP), SI
+ MOVL incX+24(FP), AX
+ MOVQ Y_data+32(FP), DI
+ MOVL incY+48(FP), BX
+
+ // Check data bounaries
+ MOVL CX, BP
+ DECL BP
+ MOVL BP, DX
+ IMULL AX, BP // BP = incX * (N - 1)
+ IMULL BX, DX // DX = incY * (N - 1)
+ CMPL BP, X_len+16(FP)
+ JGE panic
+ CMPL DX, Y_len+40(FP)
+ JGE panic
+
+ // Check if incX != 1 or incY != 1
+ CMPQ AX, $1
+ JNE with_stride
+ CMPQ BX, $1
+ JNE with_stride
+
+ // Optimized copy for incX == incY == 1
+ REP; MOVSL
+ RET
+
+with_stride:
+ // Setup strides
+ SALQ $2, AX // AX = sizeof(float32) * incX
+ SALQ $2, BX // BX = sizeof(float32) * incY
+
+ CMPQ CX, $0
+ JE end
+
+ loop:
+ MOVL (SI), DX
+ MOVL DX, (DI)
+ ADDQ AX, SI
+ ADDQ BX, DI
+ DECQ CX
+ JNE loop
+
+end:
+ RET
+
+panic:
+ CALL runtime·panicindex(SB)
+ RET
View
@@ -0,0 +1,3 @@
+package blas
+
+func Scopy(N int, X []float32, incX int, Y []float32, incY int)

0 comments on commit 9643be1

Please sign in to comment.