From 199c5e7ca420c6e81e01aff243e8b456d4a54860 Mon Sep 17 00:00:00 2001 From: cornelk Date: Fri, 2 Sep 2022 18:28:48 -0600 Subject: [PATCH] improve hashing and avoid dedicated MapString type --- Makefile | 8 - README.md | 24 +-- benchmarks/benchmark_test.go | 4 +- benchmarks/go.mod | 2 +- benchmarks/go.sum | 2 + defines.go | 6 +- example_test.go | 4 +- hashmap.go | 6 +- hashmap_string.go | 348 ----------------------------------- hashmap_test.go | 22 ++- util_hash.go | 295 ++++++++++++++++++----------- util_hash_string.go | 130 ------------- util_hash_test.go | 2 +- util_test.go | 2 +- 14 files changed, 231 insertions(+), 624 deletions(-) delete mode 100644 hashmap_string.go delete mode 100644 util_hash_string.go diff --git a/Makefile b/Makefile index f43b5c8..9bab5c4 100644 --- a/Makefile +++ b/Makefile @@ -23,11 +23,3 @@ test-coverage-web: test-coverage ## run unit tests and show test coverage in bro install-linters: ## install all used linters curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $$(go env GOPATH)/bin v1.49.0 - -alternatives: ## generate alternative non numeric hashmap versions - cp hashmap.go hashmap_string.go - sed -i 's,Map,MapString,' hashmap_string.go - sed -i 's,New\[,NewString\[,' hashmap_string.go - sed -i 's,// New,// NewString,' hashmap_string.go - sed -i 's,NewSized,NewStringSized,' hashmap_string.go - sed -i 's,numeric,string,' hashmap_string.go diff --git a/README.md b/README.md index d6377a9..955eb58 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,6 @@ The minimal supported Golang version is 1.19 as it makes use of Generics and the ## Usage -For `New()` only Go numeric types are supported. For string keyed maps `NewString` has to be used. - Example uint8 key map uses: ``` @@ -28,14 +26,14 @@ value, ok := m.Get(1) Example string key map uses: ``` -m := NewString[string, int]() +m := New[string, int]() m.Set("amount", 123) value, ok := m.Get("amount") ``` Using the map to count URL requests: ``` -m := NewString[string, *int64]() +m := New[string, *int64]() var i int64 counter, _ := m.GetOrInsert("api/123", &i) atomic.AddInt64(counter, 1) // increase counter @@ -49,18 +47,18 @@ Reading from the hash map for numeric key types in a thread-safe way is faster t in an unsafe way and four times faster than Golang's `sync.Map`: ``` -BenchmarkReadHashMapUint-8 1788601 668.4 ns/op -BenchmarkReadHaxMapUint-8 1691654 709.6 ns/op -BenchmarkReadGoMapUintUnsafe-8 1516452 784.4 ns/op -BenchmarkReadGoMapUintMutex-8 39429 27978 ns/op -BenchmarkReadGoSyncMapUint-8 446930 2544 ns/op +BenchmarkReadHashMapUint-8 1774460 677.3 ns/op +BenchmarkReadHaxMapUint-8 1758708 679.0 ns/op +BenchmarkReadGoMapUintUnsafe-8 1497732 790.9 ns/op +BenchmarkReadGoMapUintMutex-8 41562 28672 ns/op +BenchmarkReadGoSyncMapUint-8 454401 2646 ns/op ``` Reading from the map while writes are happening: ``` -BenchmarkReadHashMapWithWritesUint-8 1418299 856.4 ns/op -BenchmarkReadHaxMapWithWritesUint-8 1262414 948.5 ns/op -BenchmarkReadGoSyncMapWithWritesUint-8 382785 3240 ns/op +BenchmarkReadHashMapWithWritesUint-8 1388560 859.1 ns/op +BenchmarkReadHaxMapWithWritesUint-8 1306671 914.5 ns/op +BenchmarkReadGoSyncMapWithWritesUint-8 335732 3113 ns/op ``` Write performance without any concurrent reads: @@ -88,5 +86,3 @@ The benchmarks were run with Golang 1.19.0 on Linux and AMD64 using `make benchm When the slice reaches a defined fill rate, a bigger slice is allocated and all keys are recalculated and transferred into the new slice. * For hashing, specialized xxhash implementations are used that match the size of the key type where available - -* A specialized String version of the map exists due to a limitation of type switches of parametric types - see https://github.com/golang/go/issues/45380 for more info. diff --git a/benchmarks/benchmark_test.go b/benchmarks/benchmark_test.go index c4e2eee..e4cb132 100644 --- a/benchmarks/benchmark_test.go +++ b/benchmarks/benchmark_test.go @@ -32,10 +32,10 @@ func setupHaxMap(b *testing.B) *haxmap.HashMap[uintptr, uintptr] { return m } -func setupHashMapString(b *testing.B) (*hashmap.MapString[string, string], []string) { +func setupHashMapString(b *testing.B) (*hashmap.Map[string, string], []string) { b.Helper() - m := hashmap.NewString[string, string]() + m := hashmap.New[string, string]() keys := make([]string, benchmarkItemCount) for i := 0; i < benchmarkItemCount; i++ { s := strconv.Itoa(i) diff --git a/benchmarks/go.mod b/benchmarks/go.mod index ef95084..ec816a4 100644 --- a/benchmarks/go.mod +++ b/benchmarks/go.mod @@ -5,6 +5,6 @@ go 1.19 replace github.com/cornelk/hashmap => ../ require ( - github.com/alphadose/haxmap v0.3.1-0.20220831135524-f7fd3700af2e + github.com/alphadose/haxmap v0.3.1 github.com/cornelk/hashmap v1.0.7-0.20220831150614-2c244e4098a0 ) diff --git a/benchmarks/go.sum b/benchmarks/go.sum index 144444d..e87fcf5 100644 --- a/benchmarks/go.sum +++ b/benchmarks/go.sum @@ -2,3 +2,5 @@ github.com/alphadose/haxmap v0.3.1-0.20220831034947-0d601bb44159 h1:Vy5DvT2YgH2N github.com/alphadose/haxmap v0.3.1-0.20220831034947-0d601bb44159/go.mod h1:Fu37Wlmj7cR++vSLgRTu3fGy8wpjHGmMypM2aclkc1A= github.com/alphadose/haxmap v0.3.1-0.20220831135524-f7fd3700af2e h1:wNcWJlc0StruM5i6yPyrDYQDO3pxrxm1b/U2boezeVI= github.com/alphadose/haxmap v0.3.1-0.20220831135524-f7fd3700af2e/go.mod h1:Fu37Wlmj7cR++vSLgRTu3fGy8wpjHGmMypM2aclkc1A= +github.com/alphadose/haxmap v0.3.1 h1:P02INS0xIwY0R3E+KRJTPtrlwTvRPYoU2GgoeRk2KbU= +github.com/alphadose/haxmap v0.3.1/go.mod h1:Fu37Wlmj7cR++vSLgRTu3fGy8wpjHGmMypM2aclkc1A= diff --git a/defines.go b/defines.go index e804510..75f0e9e 100644 --- a/defines.go +++ b/defines.go @@ -6,7 +6,7 @@ const defaultSize = 8 // maxFillRate is the maximum fill rate for the slice before a resize will happen. const maxFillRate = 50 -// support all numeric types and types whose underlying type is also numeric. -type numeric interface { - ~int | ~int8 | ~int16 | ~int32 | ~int64 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr | ~float32 | ~float64 +// support all numeric and string types and aliases of those. +type hashable interface { + ~int | ~int8 | ~int16 | ~int32 | ~int64 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr | ~float32 | ~float64 | ~string } diff --git a/example_test.go b/example_test.go index 0041d1c..b4e7325 100644 --- a/example_test.go +++ b/example_test.go @@ -11,7 +11,7 @@ import ( // TestAPICounter shows how to use the hashmap to count REST server API calls. func TestAPICounter(t *testing.T) { t.Parallel() - m := NewString[string, *int64]() + m := New[string, *int64]() for i := 0; i < 100; i++ { s := fmt.Sprintf("/api%d/", i%4) @@ -28,7 +28,7 @@ func TestAPICounter(t *testing.T) { } func TestExample(t *testing.T) { - m := NewString[string, int]() + m := New[string, int]() m.Set("amount", 123) value, ok := m.Get("amount") assert.True(t, ok) diff --git a/hashmap.go b/hashmap.go index b968bc4..dbceb52 100644 --- a/hashmap.go +++ b/hashmap.go @@ -11,7 +11,7 @@ import ( ) // Map implements a read optimized hash map. -type Map[Key numeric, Value any] struct { +type Map[Key hashable, Value any] struct { hasher func(Key) uintptr store atomic.Pointer[store[Key, Value]] // pointer to a map instance that gets replaced if the map resizes linkedList *List[Key, Value] // key sorted linked list of elements @@ -21,12 +21,12 @@ type Map[Key numeric, Value any] struct { } // New returns a new map instance. -func New[Key numeric, Value any]() *Map[Key, Value] { +func New[Key hashable, Value any]() *Map[Key, Value] { return NewSized[Key, Value](defaultSize) } // NewSized returns a new map instance with a specific initialization size. -func NewSized[Key numeric, Value any](size uintptr) *Map[Key, Value] { +func NewSized[Key hashable, Value any](size uintptr) *Map[Key, Value] { m := &Map[Key, Value]{} m.allocate(size) m.setDefaultHasher() diff --git a/hashmap_string.go b/hashmap_string.go deleted file mode 100644 index df86991..0000000 --- a/hashmap_string.go +++ /dev/null @@ -1,348 +0,0 @@ -// Package hashmap provides a lock-free and thread-safe HashMapString. -package hashmap - -import ( - "bytes" - "fmt" - "reflect" - "strconv" - "sync/atomic" - "unsafe" -) - -// MapString implements a read optimized hash map. -type MapString[Key string, Value any] struct { - hasher func(Key) uintptr - store atomic.Pointer[store[Key, Value]] // pointer to a map instance that gets replaced if the map resizes - linkedList *List[Key, Value] // key sorted linked list of elements - // resizing marks a resizing operation in progress. - // this is using uintptr instead of atomic.Bool to avoid using 32 bit int on 64 bit systems - resizing atomic.Uintptr -} - -// NewString returns a new map instance. -func NewString[Key string, Value any]() *MapString[Key, Value] { - return NewStringSized[Key, Value](defaultSize) -} - -// NewStringSized returns a new map instance with a specific initialization size. -func NewStringSized[Key string, Value any](size uintptr) *MapString[Key, Value] { - m := &MapString[Key, Value]{} - m.allocate(size) - m.setDefaultHasher() - return m -} - -// SetHasher sets a custom hasher. -func (m *MapString[Key, Value]) SetHasher(hasher func(Key) uintptr) { - m.hasher = hasher -} - -// Len returns the number of elements within the map. -func (m *MapString[Key, Value]) Len() int { - return m.linkedList.Len() -} - -// Get retrieves an element from the map under given hash key. -func (m *MapString[Key, Value]) Get(key Key) (Value, bool) { - hash := m.hasher(key) - - for element := m.store.Load().item(hash); element != nil; element = element.Next() { - if element.keyHash == hash && element.key == key { - return element.Value(), true - } - - if element.keyHash > hash { - return *new(Value), false - } - } - return *new(Value), false -} - -// GetOrInsert returns the existing value for the key if present. -// Otherwise, it stores and returns the given value. -// The returned bool is true if the value was loaded, false if stored. -func (m *MapString[Key, Value]) GetOrInsert(key Key, value Value) (Value, bool) { - hash := m.hasher(key) - var newElement *ListElement[Key, Value] - - for { - for element := m.store.Load().item(hash); element != nil; element = element.Next() { - if element.keyHash == hash && element.key == key { - actual := element.Value() - return actual, true - } - - if element.keyHash > hash { - break - } - } - - if newElement == nil { // allocate only once - newElement = &ListElement[Key, Value]{ - key: key, - keyHash: hash, - } - newElement.value.Store(&value) - } - - if m.insertElement(newElement, hash, key, value) { - return value, false - } - } -} - -// FillRate returns the fill rate of the map as a percentage integer. -func (m *MapString[Key, Value]) FillRate() int { - store := m.store.Load() - count := int(store.count.Load()) - l := len(store.index) - return (count * 100) / l -} - -// Del deletes the key from the map and returns whether the key was deleted. -func (m *MapString[Key, Value]) Del(key Key) bool { - hash := m.hasher(key) - store := m.store.Load() - element := store.item(hash) - - for ; element != nil; element = element.Next() { - if element.keyHash == hash && element.key == key { - m.deleteElement(element) - m.linkedList.Delete(element) - return true - } - - if element.keyHash > hash { - return false - } - } - return false -} - -// Insert sets the value under the specified key to the map if it does not exist yet. -// If a resizing operation is happening concurrently while calling Insert, the item might show up in the map -// after the resize operation is finished. -// Returns true if the item was inserted or false if it existed. -func (m *MapString[Key, Value]) Insert(key Key, value Value) bool { - hash := m.hasher(key) - var ( - existed, inserted bool - element *ListElement[Key, Value] - ) - - for { - store := m.store.Load() - searchStart := store.item(hash) - - if !inserted { // if retrying after insert during grow, do not add to list again - element, existed, inserted = m.linkedList.Add(searchStart, hash, key, value) - if existed { - return false - } - if !inserted { - continue // a concurrent add did interfere, try again - } - } - - count := store.addItem(element) - currentStore := m.store.Load() - if store != currentStore { // retry insert in case of insert during grow - continue - } - - if m.isResizeNeeded(store, count) && m.resizing.CompareAndSwap(0, 1) { - go m.grow(0, true) - } - return true - } -} - -// Set sets the value under the specified key to the map. An existing item for this key will be overwritten. -// If a resizing operation is happening concurrently while calling Set, the item might show up in the map -// after the resize operation is finished. -func (m *MapString[Key, Value]) Set(key Key, value Value) { - hash := m.hasher(key) - - for { - store := m.store.Load() - searchStart := store.item(hash) - - element, added := m.linkedList.AddOrUpdate(searchStart, hash, key, value) - if !added { - continue // a concurrent add did interfere, try again - } - - count := store.addItem(element) - currentStore := m.store.Load() - if store != currentStore { // retry insert in case of insert during grow - continue - } - - if m.isResizeNeeded(store, count) && m.resizing.CompareAndSwap(0, 1) { - go m.grow(0, true) - } - return - } -} - -// Grow resizes the map to a new size, the size gets rounded up to next power of 2. -// To double the size of the map use newSize 0. -// This function returns immediately, the resize operation is done in a goroutine. -// No resizing is done in case of another resize operation already being in progress. -func (m *MapString[Key, Value]) Grow(newSize uintptr) { - if m.resizing.CompareAndSwap(0, 1) { - go m.grow(newSize, true) - } -} - -// String returns the map as a string, only hashed keys are printed. -func (m *MapString[Key, Value]) String() string { - buffer := bytes.NewBufferString("") - buffer.WriteRune('[') - - first := m.linkedList.First() - item := first - - for item != nil { - if item != first { - buffer.WriteRune(',') - } - fmt.Fprint(buffer, item.keyHash) - item = item.Next() - } - buffer.WriteRune(']') - return buffer.String() -} - -// Range calls f sequentially for each key and value present in the map. -// If f returns false, range stops the iteration. -func (m *MapString[Key, Value]) Range(f func(Key, Value) bool) { - item := m.linkedList.First() - - for item != nil { - value := item.Value() - if !f(item.key, value) { - return - } - item = item.Next() - } -} - -func (m *MapString[Key, Value]) allocate(newSize uintptr) { - m.linkedList = NewList[Key, Value]() - if m.resizing.CompareAndSwap(0, 1) { - m.grow(newSize, false) - } -} - -func (m *MapString[Key, Value]) isResizeNeeded(store *store[Key, Value], count uintptr) bool { - l := uintptr(len(store.index)) // l can't be 0 as it gets initialized in New() - fillRate := (count * 100) / l - return fillRate > maxFillRate -} - -func (m *MapString[Key, Value]) insertElement(element *ListElement[Key, Value], hash uintptr, key Key, value Value) bool { - var existed, inserted bool - - for { - store := m.store.Load() - searchStart := store.item(element.keyHash) - - if !inserted { // if retrying after insert during grow, do not add to list again - _, existed, inserted = m.linkedList.Add(searchStart, hash, key, value) - if existed { - return false - } - - if !inserted { - continue // a concurrent add did interfere, try again - } - } - - count := store.addItem(element) - currentStore := m.store.Load() - if store != currentStore { // retry insert in case of insert during grow - continue - } - - if m.isResizeNeeded(store, count) && m.resizing.CompareAndSwap(0, 1) { - go m.grow(0, true) - } - return true - } -} - -// deleteElement deletes an element from index. -func (m *MapString[Key, Value]) deleteElement(element *ListElement[Key, Value]) { - for { - store := m.store.Load() - index := element.keyHash >> store.keyShifts - ptr := (*unsafe.Pointer)(unsafe.Pointer(uintptr(store.array) + index*intSizeBytes)) - - next := element.Next() - if next != nil && element.keyHash>>store.keyShifts != index { - next = nil // do not set index to next item if it's not the same slice index - } - atomic.CompareAndSwapPointer(ptr, unsafe.Pointer(element), unsafe.Pointer(next)) - - currentStore := m.store.Load() - if store == currentStore { // check that no resize happened - break - } - } -} - -func (m *MapString[Key, Value]) grow(newSize uintptr, loop bool) { - defer m.resizing.CompareAndSwap(1, 0) - - for { - currentStore := m.store.Load() - if newSize == 0 { - newSize = uintptr(len(currentStore.index)) << 1 - } else { - newSize = roundUpPower2(newSize) - } - - index := make([]*ListElement[Key, Value], newSize) - header := (*reflect.SliceHeader)(unsafe.Pointer(&index)) - - newStore := &store[Key, Value]{ - keyShifts: strconv.IntSize - log2(newSize), - array: unsafe.Pointer(header.Data), // use address of slice data storage - index: index, - } - - m.fillIndexItems(newStore) // initialize new index slice with longer keys - - m.store.Store(newStore) - - m.fillIndexItems(newStore) // make sure that the new index is up-to-date with the current state of the linked list - - if !loop { - return - } - - // check if a new resize needs to be done already - count := uintptr(m.Len()) - if !m.isResizeNeeded(newStore, count) { - return - } - newSize = 0 // 0 means double the current size - } -} - -func (m *MapString[Key, Value]) fillIndexItems(store *store[Key, Value]) { - first := m.linkedList.First() - item := first - lastIndex := uintptr(0) - - for item != nil { - index := item.keyHash >> store.keyShifts - if item == first || index != lastIndex { // store item with smallest hash key for every index - store.addItem(item) - lastIndex = index - } - item = item.Next() - } -} diff --git a/hashmap_test.go b/hashmap_test.go index 247c3af..f35f6ea 100644 --- a/hashmap_test.go +++ b/hashmap_test.go @@ -60,9 +60,9 @@ func TestSetUint8(t *testing.T) { assert.Equal(t, 200, value) } -func TestSetUint16(t *testing.T) { +func TestSetInt16(t *testing.T) { t.Parallel() - m := New[uint16, int]() + m := New[int16, int]() m.Set(1, 128) // insert value, ok := m.Get(1) @@ -92,6 +92,22 @@ func TestSetFloat32(t *testing.T) { assert.Equal(t, 200, value) } +func TestSetFloat64(t *testing.T) { + t.Parallel() + m := New[float64, int]() + + m.Set(1.1, 128) // insert + value, ok := m.Get(1.1) + assert.True(t, ok) + assert.Equal(t, 128, value) + + m.Set(2.2, 200) // insert + assert.Equal(t, 2, m.Len()) + value, ok = m.Get(2.2) + assert.True(t, ok) + assert.Equal(t, 200, value) +} + func TestSetInt64(t *testing.T) { t.Parallel() m := New[int64, int]() @@ -363,7 +379,7 @@ func TestHashMap_parallel(t *testing.T) { func TestHashMap_SetConcurrent(t *testing.T) { t.Parallel() - m := NewString[string, int]() + m := New[string, int]() var wg sync.WaitGroup for i := 0; i < 100; i++ { diff --git a/util_hash.go b/util_hash.go index 8ce8c81..5cd233e 100644 --- a/util_hash.go +++ b/util_hash.go @@ -1,9 +1,11 @@ package hashmap import ( + "encoding/binary" "fmt" "math/bits" "reflect" + "unsafe" ) const ( @@ -41,14 +43,9 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -// Specialized xxhash hash functions, optimized for the bit size of the key where available, -// for all supported types beside string. - // setDefaultHasher sets the default hasher depending on the key type. // Inlines hashing as anonymous functions for performance improvements, other options like // returning an anonymous functions from another function turned out to not be as performant. -// -//nolint:funlen, maintidx func (m *Map[Key, Value]) setDefaultHasher() { var key Key kind := reflect.ValueOf(&key).Elem().Type().Kind() @@ -57,123 +54,205 @@ func (m *Map[Key, Value]) setDefaultHasher() { case reflect.Int, reflect.Uint, reflect.Uintptr: switch intSizeBytes { case 2: - m.hasher = func(key Key) uintptr { - h := prime5 + 2 - h ^= (uint64(key) & 0xff) * prime5 - h = bits.RotateLeft64(h, 11) * prime1 - h ^= ((uint64(key) >> 8) & 0xff) * prime5 - h = bits.RotateLeft64(h, 11) * prime1 - - h ^= h >> 33 - h *= prime2 - h ^= h >> 29 - h *= prime3 - h ^= h >> 32 - - return uintptr(h) - } - + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashWord)) case 4: - m.hasher = func(key Key) uintptr { - h := prime5 + 4 - h ^= uint64(key) * prime1 - h = bits.RotateLeft64(h, 23)*prime2 + prime3 - - h ^= h >> 33 - h *= prime2 - h ^= h >> 29 - h *= prime3 - h ^= h >> 32 - - return uintptr(h) - } - + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashDword)) case 8: - m.hasher = func(key Key) uintptr { - k1 := uint64(key) * prime2 - k1 = bits.RotateLeft64(k1, 31) - k1 *= prime1 - h := (prime5 + 8) ^ k1 - h = bits.RotateLeft64(h, 27)*prime1 + prime4 - - h ^= h >> 33 - h *= prime2 - h ^= h >> 29 - h *= prime3 - h ^= h >> 32 - - return uintptr(h) - } + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashQword)) default: panic(fmt.Errorf("unsupported integer byte size %d", intSizeBytes)) } case reflect.Int8, reflect.Uint8: - m.hasher = func(key Key) uintptr { - h := prime5 + 1 - h ^= uint64(key) * prime5 - h = bits.RotateLeft64(h, 11) * prime1 - - h ^= h >> 33 - h *= prime2 - h ^= h >> 29 - h *= prime3 - h ^= h >> 32 - - return uintptr(h) - } - + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashByte)) case reflect.Int16, reflect.Uint16: - m.hasher = func(key Key) uintptr { - h := prime5 + 2 - h ^= (uint64(key) & 0xff) * prime5 - h = bits.RotateLeft64(h, 11) * prime1 - h ^= ((uint64(key) >> 8) & 0xff) * prime5 - h = bits.RotateLeft64(h, 11) * prime1 - - h ^= h >> 33 - h *= prime2 - h ^= h >> 29 - h *= prime3 - h ^= h >> 32 - - return uintptr(h) - } + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashWord)) + case reflect.Int32, reflect.Uint32: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashDword)) + case reflect.Int64, reflect.Uint64: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashQword)) + case reflect.Float32: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashFloat32)) + case reflect.Float64: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashFloat64)) + case reflect.String: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashString)) + + default: + panic(fmt.Errorf("unsupported key type %T of kind %v", key, kind)) + } +} - case reflect.Int32, reflect.Uint32, reflect.Float32: - m.hasher = func(key Key) uintptr { - h := prime5 + 4 - h ^= uint64(key) * prime1 - h = bits.RotateLeft64(h, 23)*prime2 + prime3 +// Specialized xxhash hash functions, optimized for the bit size of the key where available, +// for all supported types beside string. - h ^= h >> 33 - h *= prime2 - h ^= h >> 29 - h *= prime3 - h ^= h >> 32 +var xxHashByte = func(key uint8) uintptr { + h := prime5 + 1 + h ^= uint64(key) * prime5 + h = bits.RotateLeft64(h, 11) * prime1 - return uintptr(h) - } + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return uintptr(h) +} + +var xxHashWord = func(key uint16) uintptr { + h := prime5 + 2 + h ^= (uint64(key) & 0xff) * prime5 + h = bits.RotateLeft64(h, 11) * prime1 + h ^= ((uint64(key) >> 8) & 0xff) * prime5 + h = bits.RotateLeft64(h, 11) * prime1 + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return uintptr(h) +} + +var xxHashDword = func(key uint32) uintptr { + h := prime5 + 4 + h ^= uint64(key) * prime1 + h = bits.RotateLeft64(h, 23)*prime2 + prime3 + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return uintptr(h) +} - case reflect.Int64, reflect.Uint64, reflect.Float64: - m.hasher = func(key Key) uintptr { - k1 := uint64(key) * prime2 - k1 = bits.RotateLeft64(k1, 31) - k1 *= prime1 - h := (prime5 + 8) ^ k1 - h = bits.RotateLeft64(h, 27)*prime1 + prime4 - - h ^= h >> 33 - h *= prime2 - h ^= h >> 29 - h *= prime3 - h ^= h >> 32 - - return uintptr(h) +var xxHashFloat32 = func(key float32) uintptr { + h := prime5 + 4 + h ^= uint64(key) * prime1 + h = bits.RotateLeft64(h, 23)*prime2 + prime3 + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return uintptr(h) +} + +var xxHashFloat64 = func(key float64) uintptr { + h := prime5 + 4 + h ^= uint64(key) * prime1 + h = bits.RotateLeft64(h, 23)*prime2 + prime3 + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return uintptr(h) +} + +var xxHashQword = func(key uint64) uintptr { + k1 := key * prime2 + k1 = bits.RotateLeft64(k1, 31) + k1 *= prime1 + h := (prime5 + 8) ^ k1 + h = bits.RotateLeft64(h, 27)*prime1 + prime4 + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return uintptr(h) +} + +var xxHashString = func(key string) uintptr { + sh := (*reflect.StringHeader)(unsafe.Pointer(&key)) + bh := reflect.SliceHeader{ + Data: sh.Data, + Len: sh.Len, + Cap: sh.Len, // cap needs to be set, otherwise xxhash fails on ARM Macs + } + + b := *(*[]byte)(unsafe.Pointer(&bh)) + var h uint64 + + if sh.Len >= 32 { + v1 := prime1v + prime2 + v2 := prime2 + v3 := uint64(0) + v4 := -prime1v + for len(b) >= 32 { + v1 = round(v1, binary.LittleEndian.Uint64(b[0:8:len(b)])) + v2 = round(v2, binary.LittleEndian.Uint64(b[8:16:len(b)])) + v3 = round(v3, binary.LittleEndian.Uint64(b[16:24:len(b)])) + v4 = round(v4, binary.LittleEndian.Uint64(b[24:32:len(b)])) + b = b[32:len(b):len(b)] } + h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) + h = mergeRound(h, v1) + h = mergeRound(h, v2) + h = mergeRound(h, v3) + h = mergeRound(h, v4) + } else { + h = prime5 + } - default: - panic(fmt.Errorf("unsupported key type %T of kind %v", key, kind)) + h += uint64(sh.Len) + + i, end := 0, len(b) + for ; i+8 <= end; i += 8 { + k1 := round(0, binary.LittleEndian.Uint64(b[i:i+8:len(b)])) + h ^= k1 + h = rol27(h)*prime1 + prime4 } + if i+4 <= end { + h ^= uint64(binary.LittleEndian.Uint32(b[i:i+4:len(b)])) * prime1 + h = rol23(h)*prime2 + prime3 + i += 4 + } + for ; i < end; i++ { + h ^= uint64(b[i]) * prime5 + h = rol11(h) * prime1 + } + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return uintptr(h) +} + +func round(acc, input uint64) uint64 { + acc += input * prime2 + acc = rol31(acc) + acc *= prime1 + return acc } + +func mergeRound(acc, val uint64) uint64 { + val = round(0, val) + acc ^= val + acc = acc*prime1 + prime4 + return acc +} + +func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) } +func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) } +func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) } +func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) } +func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) } +func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) } +func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) } +func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) } diff --git a/util_hash_string.go b/util_hash_string.go deleted file mode 100644 index eb935c3..0000000 --- a/util_hash_string.go +++ /dev/null @@ -1,130 +0,0 @@ -package hashmap - -import ( - "encoding/binary" - "fmt" - "math/bits" - "reflect" - "unsafe" -) - -/* -Copyright (c) 2016 Caleb Spare - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -// setDefaultHasher sets the default hasher for the string map. -// -//nolint:funlen -func (m *MapString[Key, Value]) setDefaultHasher() { - var key Key - kind := reflect.ValueOf(&key).Elem().Type().Kind() - - switch kind { - case reflect.String: - m.hasher = func(key Key) uintptr { - sh := (*reflect.StringHeader)(unsafe.Pointer(&key)) - bh := reflect.SliceHeader{ - Data: sh.Data, - Len: sh.Len, - Cap: sh.Len, // cap needs to be set, otherwise xxhash fails on ARM Macs - } - - b := *(*[]byte)(unsafe.Pointer(&bh)) - var h uint64 - - if sh.Len >= 32 { - v1 := prime1v + prime2 - v2 := prime2 - v3 := uint64(0) - v4 := -prime1v - for len(b) >= 32 { - v1 = round(v1, binary.LittleEndian.Uint64(b[0:8:len(b)])) - v2 = round(v2, binary.LittleEndian.Uint64(b[8:16:len(b)])) - v3 = round(v3, binary.LittleEndian.Uint64(b[16:24:len(b)])) - v4 = round(v4, binary.LittleEndian.Uint64(b[24:32:len(b)])) - b = b[32:len(b):len(b)] - } - h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) - h = mergeRound(h, v1) - h = mergeRound(h, v2) - h = mergeRound(h, v3) - h = mergeRound(h, v4) - } else { - h = prime5 - } - - h += uint64(sh.Len) - - i, end := 0, len(b) - for ; i+8 <= end; i += 8 { - k1 := round(0, binary.LittleEndian.Uint64(b[i:i+8:len(b)])) - h ^= k1 - h = rol27(h)*prime1 + prime4 - } - if i+4 <= end { - h ^= uint64(binary.LittleEndian.Uint32(b[i:i+4:len(b)])) * prime1 - h = rol23(h)*prime2 + prime3 - i += 4 - } - for ; i < end; i++ { - h ^= uint64(b[i]) * prime5 - h = rol11(h) * prime1 - } - - h ^= h >> 33 - h *= prime2 - h ^= h >> 29 - h *= prime3 - h ^= h >> 32 - - return uintptr(h) - } - - default: - panic(fmt.Errorf("unsupported key type %T", key)) - } -} - -func round(acc, input uint64) uint64 { - acc += input * prime2 - acc = rol31(acc) - acc *= prime1 - return acc -} - -func mergeRound(acc, val uint64) uint64 { - val = round(0, val) - acc ^= val - acc = acc*prime1 + prime4 - return acc -} - -func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) } -func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) } -func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) } -func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) } -func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) } -func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) } -func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) } -func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) } diff --git a/util_hash_test.go b/util_hash_test.go index 3cd525d..d6a8402 100644 --- a/util_hash_test.go +++ b/util_hash_test.go @@ -39,7 +39,7 @@ func TestHashingUint8(t *testing.T) { } func TestHashingString(t *testing.T) { - m := NewString[string, uint8]() + m := New[string, uint8]() assert.Equal(t, uintptr(0x6a1faf26e7da4cb9), m.hasher("properunittesting")) assert.Equal(t, uintptr(0x2d4ff7e12135f1f3), m.hasher("longstringlongstringlongstringlongstring")) } diff --git a/util_test.go b/util_test.go index 43fac79..2fd196d 100644 --- a/util_test.go +++ b/util_test.go @@ -23,7 +23,7 @@ func TestLog2(t *testing.T) { } func TestHashCollision(t *testing.T) { - m := NewString[string, int]() + m := New[string, int]() staticHasher := func(key string) uintptr { return 4 // chosen by fair dice roll. guaranteed to be random.