Skip to content

Commit 9e96984

Browse files
committed
build a string factory object for safely convert bytes to string with limited allocations
1 parent b388d68 commit 9e96984

File tree

2 files changed

+219
-1
lines changed

2 files changed

+219
-1
lines changed

src/strings/builder.go

+93-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
package strings
66

77
import (
8+
"sync/atomic"
89
"unicode/utf8"
910
"unsafe"
1011
)
@@ -44,7 +45,11 @@ func (b *Builder) copyCheck() {
4445

4546
// String returns the accumulated string.
4647
func (b *Builder) String() string {
47-
return *(*string)(unsafe.Pointer(&b.buf))
48+
return bytes2String(b.buf)
49+
}
50+
51+
func bytes2String(bytes []byte) string {
52+
return *(*string)(unsafe.Pointer(&bytes))
4853
}
4954

5055
// Len returns the number of accumulated bytes; b.Len() == len(b.String()).
@@ -122,3 +127,90 @@ func (b *Builder) WriteString(s string) (int, error) {
122127
b.buf = append(b.buf, s...)
123128
return len(s), nil
124129
}
130+
131+
const (
132+
// DefaultFactoryPoolSize is the default pool size for the Factory.
133+
DefaultFactoryPoolSize = 4096
134+
)
135+
136+
// Factory represents the factory object for generating immutable strings.
137+
type Factory struct {
138+
b Builder
139+
}
140+
141+
// NewFactory generate a string factory.
142+
func NewFactory() *Factory {
143+
return NewFactoryWithPoolSize(DefaultFactoryPoolSize)
144+
}
145+
146+
// NewFactoryWithPoolSize specify a pool size for the factory to generate
147+
// strings, the pool size is only for the memory fragmentation preventation.
148+
func NewFactoryWithPoolSize(size int) *Factory {
149+
f := &Factory{}
150+
f.b.Grow(size)
151+
return f
152+
}
153+
154+
// NewString generate a string from bytes content.
155+
func (f *Factory) New(content []byte) string {
156+
157+
bCap := f.b.Cap()
158+
bLen := f.b.Len()
159+
160+
if len(content)*2 > bCap {
161+
return string(content)
162+
}
163+
164+
if len(content) > bCap-bLen {
165+
f.b.Reset()
166+
f.b.Grow(bCap)
167+
}
168+
169+
preLen := f.b.Len()
170+
f.b.Write(content)
171+
return f.b.String()[preLen:]
172+
}
173+
174+
// for internal using, see globalFactory usage
175+
type syncTape struct {
176+
tape [DefaultFactoryPoolSize]byte
177+
tPtr int64
178+
}
179+
180+
func (st *syncTape) alloc(size int) ([]byte, bool) {
181+
182+
end := atomic.AddInt64(&st.tPtr, int64(size))
183+
if end > int64(len(st.tape)) {
184+
// to prevent overflow
185+
atomic.StoreInt64(&st.tPtr, int64(len(st.tape)))
186+
return nil, false
187+
}
188+
189+
return st.tape[end-int64(size) : end], true
190+
}
191+
192+
var globalFactory atomic.Value
193+
194+
// New generate an immutable string from mutable bytes
195+
func New(content []byte) string {
196+
197+
if len(content)*2 > DefaultFactoryPoolSize {
198+
return string(content)
199+
}
200+
201+
gf := globalFactory.Load()
202+
if gf != nil {
203+
tape := gf.(*syncTape)
204+
frag, ok := tape.alloc(len(content))
205+
if ok {
206+
copy(frag, content)
207+
return bytes2String(frag)
208+
}
209+
}
210+
211+
tape := &syncTape{}
212+
frag, _ := tape.alloc(len(content))
213+
globalFactory.Store(tape)
214+
copy(frag, content)
215+
return bytes2String(frag)
216+
}

src/strings/builder_test.go

+126
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ package strings_test
66

77
import (
88
"bytes"
9+
"math/rand"
10+
"strconv"
911
. "strings"
1012
"testing"
1113
)
@@ -364,3 +366,127 @@ func BenchmarkBuildString_ByteBuffer(b *testing.B) {
364366
}
365367
})
366368
}
369+
370+
func randomString(l int) string {
371+
material := "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz1234567890"
372+
buff := []byte(nil)
373+
for i := 0; i < l; i++ {
374+
buff = append(buff, material[rand.Intn(len(material))])
375+
}
376+
return string(buff)
377+
}
378+
379+
func TestFactoryNewString(t *testing.T) {
380+
381+
factory := NewFactoryWithPoolSize(1024)
382+
383+
largeStr := randomString(4096)
384+
largeStr0 := factory.New([]byte(largeStr))
385+
if largeStr != largeStr0 {
386+
t.Error("equality assuming hasn't been satisfied")
387+
}
388+
389+
for count := 0; count < 1024; count++ {
390+
str := randomString(64)
391+
str0 := factory.New([]byte(str))
392+
if str != str0 {
393+
t.Error("equality assuming hasn't been satisfied")
394+
}
395+
}
396+
}
397+
398+
func TestNewString(t *testing.T) {
399+
400+
largeStr := randomString(4096)
401+
largeStr0 := New([]byte(largeStr))
402+
if largeStr != largeStr0 {
403+
t.Error("equality assuming hasn't been satisfied")
404+
}
405+
406+
for testCount := 0; testCount < 512; testCount++ {
407+
t.Run(strconv.Itoa(testCount), func(t *testing.T) {
408+
t.Parallel()
409+
410+
for count := 0; count < 1024; count++ {
411+
str := randomString(8)
412+
str0 := New([]byte(str))
413+
if str != str0 {
414+
t.Error("equality assuming hasn't been satisfied")
415+
}
416+
}
417+
})
418+
}
419+
}
420+
421+
func BenchmarkFactoryNewString(b *testing.B) {
422+
423+
content := []byte("hello world!")
424+
consumer := func(str string) {}
425+
426+
b.Run("oldway", func(b *testing.B) {
427+
for benchCount := 0; benchCount < b.N; benchCount++ {
428+
for i := 0; i < 100; i++ {
429+
str := string(content)
430+
consumer(str)
431+
}
432+
}
433+
})
434+
435+
b.Run("factory", func(b *testing.B) {
436+
factory := NewFactory()
437+
for benchCount := 0; benchCount < b.N; benchCount++ {
438+
for i := 0; i < 100; i++ {
439+
str := factory.New(content)
440+
consumer(str)
441+
}
442+
}
443+
})
444+
}
445+
446+
func BenchmarkNewString(b *testing.B) {
447+
448+
content := []byte("hello world!")
449+
consumer := func(str string) {}
450+
451+
b.Run("oldway-sync", func(b *testing.B) {
452+
for benchCount := 0; benchCount < b.N; benchCount++ {
453+
for i := 0; i < 100; i++ {
454+
str := string(content)
455+
consumer(str)
456+
}
457+
}
458+
})
459+
460+
b.Run("factory-sync", func(b *testing.B) {
461+
for benchCount := 0; benchCount < b.N; benchCount++ {
462+
for i := 0; i < 100; i++ {
463+
str := New(content)
464+
consumer(str)
465+
}
466+
}
467+
})
468+
469+
b.Run("oldway-async", func(b *testing.B) {
470+
b.SetParallelism(2)
471+
b.RunParallel(func(pb *testing.PB) {
472+
for pb.Next() {
473+
for i := 0; i < 100; i++ {
474+
str := string(content)
475+
consumer(str)
476+
}
477+
}
478+
})
479+
})
480+
481+
b.Run("factory-async", func(b *testing.B) {
482+
b.SetParallelism(2)
483+
b.RunParallel(func(pb *testing.PB) {
484+
for pb.Next() {
485+
for i := 0; i < 100; i++ {
486+
str := New(content)
487+
consumer(str)
488+
}
489+
}
490+
})
491+
})
492+
}

0 commit comments

Comments
 (0)