Skip to content

Commit

Permalink
apacheGH-35337: [Go] ASAN tests fail with Go1.20+
Browse files Browse the repository at this point in the history
  • Loading branch information
zeroshade committed Apr 25, 2023
1 parent a1403d4 commit 853750a
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 76 deletions.
2 changes: 1 addition & 1 deletion go/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

module github.com/apache/arrow/go/v12

go 1.18
go 1.20

require (
github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c
Expand Down
90 changes: 90 additions & 0 deletions go/internal/hashing/hash_funcs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package hashing

import (
"math/bits"
"unsafe"

"github.com/zeebo/xxh3"
)

func hashInt(val uint64, alg uint64) uint64 {
// Two of xxhash's prime multipliers (which are chosen for their
// bit dispersion properties)
var multipliers = [2]uint64{11400714785074694791, 14029467366897019727}
// Multiplying by the prime number mixes the low bits into the high bits,
// then byte-swapping (which is a single CPU instruction) allows the
// combined high and low bits to participate in the initial hash table index.
return bits.ReverseBytes64(multipliers[alg] * val)
}

func hashFloat32(val float32, alg uint64) uint64 {
// grab the raw byte pattern of the
bt := *(*[4]byte)(unsafe.Pointer(&val))
x := uint64(*(*uint32)(unsafe.Pointer(&bt[0])))
hx := hashInt(x, alg)
hy := hashInt(x, alg^1)
return 4 ^ hx ^ hy
}

func hashFloat64(val float64, alg uint64) uint64 {
bt := *(*[8]byte)(unsafe.Pointer(&val))
hx := hashInt(uint64(*(*uint32)(unsafe.Pointer(&bt[4]))), alg)
hy := hashInt(uint64(*(*uint32)(unsafe.Pointer(&bt[0]))), alg^1)
return 8 ^ hx ^ hy
}

// prime constants used for slightly increasing the hash quality further
var exprimes = [2]uint64{1609587929392839161, 9650029242287828579}

// for smaller amounts of bytes this is faster than even calling into
// xxh3 to do the hash, so we specialize in order to get the benefits
// of that performance.
func hash(b []byte, alg uint64) uint64 {
n := uint32(len(b))
if n <= 16 {
switch {
case n > 8:
// 8 < length <= 16
// apply same principle as above, but as two 64-bit ints
x := *(*uint64)(unsafe.Pointer(&b[n-8]))
y := *(*uint64)(unsafe.Pointer(&b[0]))
hx := hashInt(x, alg)
hy := hashInt(y, alg^1)
return uint64(n) ^ hx ^ hy
case n >= 4:
// 4 < length <= 8
// we can read the bytes as two overlapping 32-bit ints, apply different
// hash functions to each in parallel
// then xor the results
x := *(*uint32)(unsafe.Pointer(&b[n-4]))
y := *(*uint32)(unsafe.Pointer(&b[0]))
hx := hashInt(uint64(x), alg)
hy := hashInt(uint64(y), alg^1)
return uint64(n) ^ hx ^ hy
case n > 0:
x := uint32((n << 24) ^ (uint32(b[0]) << 16) ^ (uint32(b[n/2]) << 8) ^ uint32(b[n-1]))
return hashInt(uint64(x), alg)
case n == 0:
return 1
}
}

// increase differentiation enough to improve hash quality
return xxh3.Hash(b) + exprimes[alg]
}
30 changes: 30 additions & 0 deletions go/internal/hashing/hash_string.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !go1.20

package hashing

import (
"reflect"
"unsafe"
)

func hashString(val string, alg uint64) uint64 {
buf := *(*[]byte)(unsafe.Pointer(&val))
(*reflect.SliceHeader)(unsafe.Pointer(&buf)).Cap = len(val)
return hash(buf, alg)
}
26 changes: 26 additions & 0 deletions go/internal/hashing/hash_string_go1.20.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build go1.20

package hashing

import "unsafe"

func hashString(val string, alg uint64) uint64 {
buf := unsafe.Slice(unsafe.StringData(val), len(val))
return hash(buf, alg)
}
75 changes: 0 additions & 75 deletions go/internal/hashing/xxh3_memo_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,10 @@ package hashing
import (
"bytes"
"math"
"math/bits"
"reflect"
"unsafe"

"github.com/apache/arrow/go/v12/parquet"

"github.com/zeebo/xxh3"
)

//go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=types.tmpldata xxh3_memo_table.gen.go.tmpl
Expand Down Expand Up @@ -76,78 +73,6 @@ type NumericMemoTable interface {
WriteOutSubsetLE(offset int, out []byte)
}

func hashInt(val uint64, alg uint64) uint64 {
// Two of xxhash's prime multipliers (which are chosen for their
// bit dispersion properties)
var multipliers = [2]uint64{11400714785074694791, 14029467366897019727}
// Multiplying by the prime number mixes the low bits into the high bits,
// then byte-swapping (which is a single CPU instruction) allows the
// combined high and low bits to participate in the initial hash table index.
return bits.ReverseBytes64(multipliers[alg] * val)
}

func hashFloat32(val float32, alg uint64) uint64 {
// grab the raw byte pattern of the
bt := *(*[4]byte)(unsafe.Pointer(&val))
x := uint64(*(*uint32)(unsafe.Pointer(&bt[0])))
hx := hashInt(x, alg)
hy := hashInt(x, alg^1)
return 4 ^ hx ^ hy
}

func hashFloat64(val float64, alg uint64) uint64 {
bt := *(*[8]byte)(unsafe.Pointer(&val))
hx := hashInt(uint64(*(*uint32)(unsafe.Pointer(&bt[4]))), alg)
hy := hashInt(uint64(*(*uint32)(unsafe.Pointer(&bt[0]))), alg^1)
return 8 ^ hx ^ hy
}

func hashString(val string, alg uint64) uint64 {
buf := *(*[]byte)(unsafe.Pointer(&val))
(*reflect.SliceHeader)(unsafe.Pointer(&buf)).Cap = len(val)
return hash(buf, alg)
}

// prime constants used for slightly increasing the hash quality further
var exprimes = [2]uint64{1609587929392839161, 9650029242287828579}

// for smaller amounts of bytes this is faster than even calling into
// xxh3 to do the hash, so we specialize in order to get the benefits
// of that performance.
func hash(b []byte, alg uint64) uint64 {
n := uint32(len(b))
if n <= 16 {
switch {
case n > 8:
// 8 < length <= 16
// apply same principle as above, but as two 64-bit ints
x := *(*uint64)(unsafe.Pointer(&b[n-8]))
y := *(*uint64)(unsafe.Pointer(&b[0]))
hx := hashInt(x, alg)
hy := hashInt(y, alg^1)
return uint64(n) ^ hx ^ hy
case n >= 4:
// 4 < length <= 8
// we can read the bytes as two overlapping 32-bit ints, apply different
// hash functions to each in parallel
// then xor the results
x := *(*uint32)(unsafe.Pointer(&b[n-4]))
y := *(*uint32)(unsafe.Pointer(&b[0]))
hx := hashInt(uint64(x), alg)
hy := hashInt(uint64(y), alg^1)
return uint64(n) ^ hx ^ hy
case n > 0:
x := uint32((n << 24) ^ (uint32(b[0]) << 16) ^ (uint32(b[n/2]) << 8) ^ uint32(b[n-1]))
return hashInt(uint64(x), alg)
case n == 0:
return 1
}
}

// increase differentiation enough to improve hash quality
return xxh3.Hash(b) + exprimes[alg]
}

const (
sentinel uint64 = 0
loadFactor int64 = 2
Expand Down

0 comments on commit 853750a

Please sign in to comment.