Permalink
Browse files

Dcopy added

  • Loading branch information...
1 parent 4c43e18 commit 01244730d9327f4f0076f56a55bbcc9cc613690d @ziutek committed Nov 13, 2011
Showing with 99 additions and 0 deletions.
  1. +2 −0 Makefile
  2. +28 −0 d_test.go
  3. +15 −0 dcopy.go
  4. +51 −0 dcopy_amd64.s
  5. +3 −0 dcopy_decl.go
View
@@ -16,6 +16,7 @@ OFILES_amd64=\
idamax_amd64.$O\
sswap_amd64.$O\
dswap_amd64.$O\
+ dcopy_amd64.$O\
OFILES=\
$(OFILES_$(GOARCH))
@@ -33,6 +34,7 @@ ALLGOFILES=\
idamax.go\
sswap.go\
dswap.go\
+ dcopy.go\
NOGOFILES=\
$(subst _$(GOARCH).$O,.go,$(OFILES_$(GOARCH)))
View
@@ -110,6 +110,26 @@ func TestDswap(t *testing.T) {
}
}
+func TestDcopy(t *testing.T) {
+ for inc := 1; inc < 9; inc++ {
+ for N := 0; N <= len(xd)/inc; N++ {
+ a := make([]float64, len(xd))
+ Dcopy(N, xd, inc, a, inc)
+ for i := 0; i < inc * N; i++ {
+ if i % inc == 0 {
+ if a[i] != xd[i] {
+ t.Fatalf("inc=%d N=%d i=%d r=%f e=%f", inc, N, i, a[i], xd[i])
+ }
+ } else {
+ if a[i] != 0 {
+ t.Fatalf("inc=%d N=%d i=%d r=%f e=0", inc, N, i, a[i])
+ }
+ }
+ }
+ }
+ }
+}
+
var vd, wd []float64
func init() {
@@ -154,3 +174,11 @@ func BenchmarkDswap(b *testing.B) {
Dswap(len(x), x, 1, y, 1)
}
}
+func BenchmarkDcopy(b *testing.B) {
+ b.StopTimer()
+ y := make([]float64, len(vd))
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ Dcopy(len(vd), vd, 1, y, 1)
+ }
+}
View
@@ -0,0 +1,15 @@
+package blas
+
+// Copy the elements of the vectors X and Y.
+func Dcopy(N int, X []float64, incX int, Y []float64, incY int) {
+ if incX == 1 && incY == 1 {
+ copy(Y[:N], X[:N])
+ return
+ }
+ var xi, yi int
+ for ; N > 0; N-- {
+ Y[yi] = X[xi]
+ xi += incX
+ yi += incY
+ }
+}
View
@@ -0,0 +1,51 @@
+// func Dcopy(N int, X []float64, incX int, Y []float64, incY int)
+TEXT ·Dcopy(SB), 7, $0
+ MOVL N+0(FP), CX
+ MOVQ X_data+8(FP), SI
+ MOVL incX+24(FP), AX
+ MOVQ Y_data+32(FP), DI
+ MOVL incY+48(FP), BX
+
+ // Check data bounaries
+ MOVL CX, BP
+ DECL BP
+ MOVL BP, DX
+ IMULL AX, BP // BP = incX * (N - 1)
+ IMULL BX, DX // DX = incY * (N - 1)
+ CMPL BP, X_len+16(FP)
+ JGE panic
+ CMPL DX, Y_len+40(FP)
+ JGE panic
+
+ // Check if incX != 1 or incY != 1
+ CMPQ AX, $1
+ JNE with_stride
+ CMPQ BX, $1
+ JNE with_stride
+
+ // Optimized copy for incX == incY == 1
+ REP; MOVSQ
+ RET
+
+with_stride:
+ // Setup strides
+ SALQ $3, AX // AX = sizeof(float64) * incX
+ SALQ $3, BX // BX = sizeof(float64) * incY
+
+ CMPQ CX, $0
+ JE end
+
+ loop:
+ MOVQ (SI), DX
+ MOVQ DX, (DI)
+ ADDQ AX, SI
+ ADDQ BX, DI
+ DECQ CX
+ JNE loop
+
+end:
+ RET
+
+panic:
+ CALL runtime·panicindex(SB)
+ RET
View
@@ -0,0 +1,3 @@
+package blas
+
+func Dcopy(N int, X []float64, incX int, Y []float64, incY int)

0 comments on commit 0124473

Please sign in to comment.