WIP switch writeBlocks to take a slice pointer
diff --git a/xxhash.go b/xxhash.go index 7ab754e..309717f 100644 --- a/xxhash.go +++ b/xxhash.go
@@ -133,7 +133,7 @@ if len(b) >= 32 { // One or more full blocks left. - b = writeBlocks(x, b) + writeBlocks(x, &b) } // Store any remaining partial block. @@ -143,8 +143,9 @@ return } -func writeBlocksGo(x *xxh, b []byte) []byte { +func writeBlocksGo(x *xxh, bp *[]byte) { v1, v2, v3, v4 := x.v1, x.v2, x.v3, x.v4 + b := *bp for len(b) >= 32 { v1 = round(v1, u64(b[0:8:len(b)])) v2 = round(v2, u64(b[8:16:len(b)])) @@ -153,7 +154,7 @@ b = b[32:len(b):len(b)] } x.v1, x.v2, x.v3, x.v4 = v1, v2, v3, v4 - return b + *bp = b } func (x *xxh) Sum(b []byte) []byte {
diff --git a/xxhash_amd64.go b/xxhash_amd64.go index e294303..e1335d9 100644 --- a/xxhash_amd64.go +++ b/xxhash_amd64.go
@@ -6,4 +6,4 @@ func sum64(b []byte) uint64 -func writeBlocks(x *xxh, b []byte) []byte +func writeBlocks(x *xxh, bp *[]byte)
diff --git a/xxhash_amd64.s b/xxhash_amd64.s index c49483d..54043eb 100644 --- a/xxhash_amd64.s +++ b/xxhash_amd64.s
@@ -170,17 +170,18 @@ RET // writeBlocks uses the same registers as above except that it uses AX to store -// the x pointer. +// the x pointer and R15 to store the bp pointer. -// func writeBlocks(x *xxh, b []byte) []byte -TEXT ·writeBlocks(SB), NOSPLIT, $0-56 +// func writeBlocks(x *xxh, bp *[]byte) +TEXT ·writeBlocks(SB), NOSPLIT, $0-16 // Load fixed primes needed for round. MOVQ ·prime1(SB), R13 MOVQ ·prime2(SB), R14 // Load slice. - MOVQ b_base+8(FP), CX - MOVQ b_len+16(FP), DX + MOVQ bp+8(FP), R15 + MOVQ (R15), CX // base + MOVQ 8(R15), DX // len LEAQ (CX)(DX*1), BX SUBQ $32, BX @@ -208,15 +209,15 @@ MOVQ R10, 16(AX) MOVQ R11, 24(AX) - // Construct return slice. - MOVQ CX, ret+32(FP) + // Write result slice. + MOVQ CX, (R15) // New length is 32 - (CX - BX) -> BX+32 - CX. ADDQ $32, BX SUBQ CX, BX - MOVQ BX, ret+40(FP) + MOVQ BX, 8(R15) // Set the cap same as length. - MOVQ BX, ret+48(FP) + MOVQ BX, 16(R15) RET
diff --git a/xxhash_amd64_test.go b/xxhash_amd64_test.go index 330ce96..a9f1495 100644 --- a/xxhash_amd64_test.go +++ b/xxhash_amd64_test.go
@@ -27,17 +27,18 @@ x0 := New().(*xxh) x1 := New().(*xxh) for i := 32; i < 500; i++ { - b := make([]byte, i) - for j := range b { - b[j] = byte(j) + b0 := make([]byte, i) + for j := range b0 { + b0[j] = byte(j) } - pureGo := writeBlocksGo(x0, b) - asm := writeBlocks(x1, b) - if !reflect.DeepEqual(pureGo, asm) { - t.Fatalf("[i=%d] pure go gave %v; asm gave %v", i, pureGo, asm) + b1 := b0 + writeBlocksGo(x0, &b0) + writeBlocks(x1, &b1) + if !reflect.DeepEqual(b0, b1) { + t.Fatalf("[i=%d] pure go gave %v; b1 gave %v", i, b0, b1) } if !reflect.DeepEqual(x0, x1) { - t.Fatalf("[i=%d] pure go had state %v; asm had state %v", i, x0, x1) + t.Fatalf("[i=%d] pure go had state %v; b1 had state %v", i, x0, x1) } } }