Skip to content

Commit 9e16cc1

Browse files
committed
hash/maphash: add Bytes and String
For very small inputs, h.Reset+h.Write+h.Sum64 is fundamentally slower than a single operation, by about a factor of two, because Write must copy the data into h's buffer, just in case there is another Write before the Sum64. A single function doing the whole sequence knows there is no extra write that will happen, so it doesn't need the buffer, so it avoids the copy. Fixes #42710. Change-Id: Icc79c68ccb10827f6640071d026df86b4940fcc1 Reviewed-on: https://go-review.googlesource.com/c/go/+/392494 Reviewed-by: Ian Lance Taylor <[email protected]> Trust: Russ Cox <[email protected]> Run-TryBot: Russ Cox <[email protected]> TryBot-Result: Gopher Robot <[email protected]>
1 parent 9839668 commit 9e16cc1

File tree

3 files changed

+90
-20
lines changed

3 files changed

+90
-20
lines changed

api/next/42710.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
pkg hash/maphash, func Bytes(Seed, []uint8) uint64 #42710
2+
pkg hash/maphash, func String(Seed, string) uint64 #42710

src/hash/maphash/maphash.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,54 @@ type Seed struct {
3333
s uint64
3434
}
3535

36+
// Bytes returns the hash of b with the given seed.
37+
//
38+
// Bytes is equivalent to, but more convenient and efficient than:
39+
//
40+
// var h Hash
41+
// h.SetSeed(seed)
42+
// h.Write(b)
43+
// return h.Sum()
44+
func Bytes(seed Seed, b []byte) uint64 {
45+
state := seed.s
46+
if state == 0 {
47+
panic("maphash: use of uninitialized Seed")
48+
}
49+
if len(b) == 0 {
50+
return rthash(nil, 0, state) // avoid &b[0] index panic below
51+
}
52+
if len(b) > bufSize {
53+
b = b[:len(b):len(b)] // merge len and cap calculations when reslicing
54+
for len(b) > bufSize {
55+
state = rthash(&b[0], bufSize, state)
56+
b = b[bufSize:]
57+
}
58+
}
59+
return rthash(&b[0], len(b), state)
60+
}
61+
62+
// String returns the hash of s with the given seed.
63+
//
64+
// String is equivalent to, but more convenient and efficient than:
65+
//
66+
// var h Hash
67+
// h.SetSeed(seed)
68+
// h.WriteString(s)
69+
// return h.Sum()
70+
func String(seed Seed, s string) uint64 {
71+
state := seed.s
72+
if state == 0 {
73+
panic("maphash: use of uninitialized Seed")
74+
}
75+
for len(s) > bufSize {
76+
p := (*byte)((*unsafeheader.String)(unsafe.Pointer(&s)).Data)
77+
state = rthash(p, bufSize, state)
78+
s = s[bufSize:]
79+
}
80+
p := (*byte)((*unsafeheader.String)(unsafe.Pointer(&s)).Data)
81+
return rthash(p, len(s), state)
82+
}
83+
3684
// A Hash computes a seeded hash of a byte sequence.
3785
//
3886
// The zero Hash is a valid Hash ready to use.

src/hash/maphash/maphash_test.go

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package maphash
66

77
import (
88
"bytes"
9+
"fmt"
910
"hash"
1011
"testing"
1112
)
@@ -87,6 +88,14 @@ func TestHashGrouping(t *testing.T) {
8788
t.Errorf("hash %d not identical to a single Write", i)
8889
}
8990
}
91+
92+
if sum1 := Bytes(hh[0].Seed(), b); sum1 != hh[0].Sum64() {
93+
t.Errorf("hash using Bytes not identical to a single Write")
94+
}
95+
96+
if sum1 := String(hh[0].Seed(), string(b)); sum1 != hh[0].Sum64() {
97+
t.Errorf("hash using String not identical to a single Write")
98+
}
9099
}
91100

92101
func TestHashBytesVsString(t *testing.T) {
@@ -208,28 +217,39 @@ var _ hash.Hash64 = &Hash{}
208217
func benchmarkSize(b *testing.B, size int) {
209218
h := &Hash{}
210219
buf := make([]byte, size)
211-
b.SetBytes(int64(size))
212-
b.ResetTimer()
213-
214-
for i := 0; i < b.N; i++ {
215-
h.Reset()
216-
h.Write(buf)
217-
h.Sum64()
218-
}
219-
}
220-
221-
func BenchmarkHash8Bytes(b *testing.B) {
222-
benchmarkSize(b, 8)
223-
}
220+
s := string(buf)
221+
222+
b.Run("Write", func(b *testing.B) {
223+
b.SetBytes(int64(size))
224+
for i := 0; i < b.N; i++ {
225+
h.Reset()
226+
h.Write(buf)
227+
h.Sum64()
228+
}
229+
})
224230

225-
func BenchmarkHash320Bytes(b *testing.B) {
226-
benchmarkSize(b, 320)
227-
}
231+
b.Run("Bytes", func(b *testing.B) {
232+
b.SetBytes(int64(size))
233+
seed := h.Seed()
234+
for i := 0; i < b.N; i++ {
235+
Bytes(seed, buf)
236+
}
237+
})
228238

229-
func BenchmarkHash1K(b *testing.B) {
230-
benchmarkSize(b, 1024)
239+
b.Run("String", func(b *testing.B) {
240+
b.SetBytes(int64(size))
241+
seed := h.Seed()
242+
for i := 0; i < b.N; i++ {
243+
String(seed, s)
244+
}
245+
})
231246
}
232247

233-
func BenchmarkHash8K(b *testing.B) {
234-
benchmarkSize(b, 8192)
248+
func BenchmarkHash(b *testing.B) {
249+
sizes := []int{4, 8, 16, 32, 64, 256, 320, 1024, 4096, 16384}
250+
for _, size := range sizes {
251+
b.Run(fmt.Sprint("n=", size), func(b *testing.B) {
252+
benchmarkSize(b, size)
253+
})
254+
}
235255
}

0 commit comments

Comments
 (0)