Skip to content

Commit d6ef98b

Browse files
committed
runtime: optimize bulkBarrierPreWrite with allocheaders
Currently bulkBarrierPreWrite follows a fairly slow path wherein it calls typePointersOf, which ends up calling into fastForward. This does some fairly heavy computation to move the iterator forward without any assumptions about where it lands at all. It needs to be completely general to support splitting at arbitrary boundaries, for example for scanning oblets. This means that copying objects during the GC mark phase is fairly expensive, and is a regression from before allocheaders. However, in almost all cases bulkBarrierPreWrite and bulkBarrierPreWriteSrcOnly have perfect type information. We can do a lot better in these cases because we're starting on a type-size boundary, which is exactly what the iterator is built around. This change adds the typePointersOfType method which produces a typePointers iterator from a pointer and a type. This change significantly improves the performance of these bulk write barriers, eliminating some performance regressions that were noticed on the perf dashboard. There are still just a couple cases where we have to use the more general typePointersOf calls, but they're fairly rare; most bulk barriers have perfect type information. This change is tested by the GCInfo tests in the runtime and the GCBits tests in the reflect package via an additional check in getgcmask. Results for tile38 before and after allocheaders. There was previous a regression in the p90, now it's gone. Also, the overall win has been boosted slightly. tile38 $ benchstat noallocheaders.results allocheaders.results name old time/op new time/op delta Tile38QueryLoad 481µs ± 1% 468µs ± 1% -2.71% (p=0.000 n=10+10) name old average-RSS-bytes new average-RSS-bytes delta Tile38QueryLoad 6.32GB ± 1% 6.23GB ± 0% -1.38% (p=0.000 n=9+8) name old peak-RSS-bytes new peak-RSS-bytes delta Tile38QueryLoad 6.49GB ± 1% 6.40GB ± 1% -1.38% (p=0.002 n=10+10) name old peak-VM-bytes new peak-VM-bytes delta Tile38QueryLoad 7.72GB ± 1% 7.64GB ± 1% -1.07% (p=0.007 n=10+10) name old p50-latency-ns new p50-latency-ns delta Tile38QueryLoad 212k ± 1% 205k ± 0% -3.02% (p=0.000 n=10+9) name old p90-latency-ns new p90-latency-ns delta Tile38QueryLoad 622k ± 1% 616k ± 1% -1.03% (p=0.005 n=10+10) name old p99-latency-ns new p99-latency-ns delta Tile38QueryLoad 4.55M ± 2% 4.39M ± 2% -3.51% (p=0.000 n=10+10) name old ops/s new ops/s delta Tile38QueryLoad 12.5k ± 1% 12.8k ± 1% +2.78% (p=0.000 n=10+10) Change-Id: I0a48f848eae8777d0fd6769c3a1fe449f8d9d0a6 Reviewed-on: https://go-review.googlesource.com/c/go/+/542219 Reviewed-by: Cherry Mui <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent 17eb0a2 commit d6ef98b

File tree

5 files changed

+237
-26
lines changed

5 files changed

+237
-26
lines changed

src/reflect/value.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1723,7 +1723,7 @@ func (v Value) SetZero() {
17231723
case Slice:
17241724
*(*unsafeheader.Slice)(v.ptr) = unsafeheader.Slice{}
17251725
case Interface:
1726-
*(*[2]unsafe.Pointer)(v.ptr) = [2]unsafe.Pointer{}
1726+
*(*emptyInterface)(v.ptr) = emptyInterface{}
17271727
case Chan, Func, Map, Pointer, UnsafePointer:
17281728
*(*unsafe.Pointer)(v.ptr) = nil
17291729
case Array, Struct:

src/runtime/mbarrier.go

+35-9
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,10 @@ func typedmemmove(typ *abi.Type, dst, src unsafe.Pointer) {
154154
return
155155
}
156156
if writeBarrier.enabled && typ.PtrBytes != 0 {
157-
bulkBarrierPreWrite(uintptr(dst), uintptr(src), typ.PtrBytes)
157+
// This always copies a full value of type typ so it's safe
158+
// to pass typ along as an optimization. See the comment on
159+
// bulkBarrierPreWrite.
160+
bulkBarrierPreWrite(uintptr(dst), uintptr(src), typ.PtrBytes, typ)
158161
}
159162
// There's a race here: if some other goroutine can write to
160163
// src, it may change some pointer in src after we've
@@ -176,7 +179,10 @@ func typedmemmove(typ *abi.Type, dst, src unsafe.Pointer) {
176179
//go:nowritebarrierrec
177180
//go:nosplit
178181
func wbZero(typ *_type, dst unsafe.Pointer) {
179-
bulkBarrierPreWrite(uintptr(dst), 0, typ.PtrBytes)
182+
// This always copies a full value of type typ so it's safe
183+
// to pass typ along as an optimization. See the comment on
184+
// bulkBarrierPreWrite.
185+
bulkBarrierPreWrite(uintptr(dst), 0, typ.PtrBytes, typ)
180186
}
181187

182188
// wbMove performs the write barrier operations necessary before
@@ -186,7 +192,11 @@ func wbZero(typ *_type, dst unsafe.Pointer) {
186192
//go:nowritebarrierrec
187193
//go:nosplit
188194
func wbMove(typ *_type, dst, src unsafe.Pointer) {
189-
bulkBarrierPreWrite(uintptr(dst), uintptr(src), typ.PtrBytes)
195+
// This always copies a full value of type typ so it's safe to
196+
// pass a type here.
197+
//
198+
// See the comment on bulkBarrierPreWrite.
199+
bulkBarrierPreWrite(uintptr(dst), uintptr(src), typ.PtrBytes, typ)
190200
}
191201

192202
//go:linkname reflect_typedmemmove reflect.typedmemmove
@@ -223,7 +233,10 @@ func reflectlite_typedmemmove(typ *_type, dst, src unsafe.Pointer) {
223233
//go:nosplit
224234
func reflectcallmove(typ *_type, dst, src unsafe.Pointer, size uintptr, regs *abi.RegArgs) {
225235
if writeBarrier.enabled && typ != nil && typ.PtrBytes != 0 && size >= goarch.PtrSize {
226-
bulkBarrierPreWrite(uintptr(dst), uintptr(src), size)
236+
// Pass nil for the type. dst does not point to value of type typ,
237+
// but rather points into one, so applying the optimization is not
238+
// safe. See the comment on this function.
239+
bulkBarrierPreWrite(uintptr(dst), uintptr(src), size, nil)
227240
}
228241
memmove(dst, src, size)
229242

@@ -278,8 +291,11 @@ func typedslicecopy(typ *_type, dstPtr unsafe.Pointer, dstLen int, srcPtr unsafe
278291
// before calling typedslicecopy.
279292
size := uintptr(n) * typ.Size_
280293
if writeBarrier.enabled {
294+
// This always copies one or more full values of type typ so
295+
// it's safe to pass typ along as an optimization. See the comment on
296+
// bulkBarrierPreWrite.
281297
pwsize := size - typ.Size_ + typ.PtrBytes
282-
bulkBarrierPreWrite(uintptr(dstPtr), uintptr(srcPtr), pwsize)
298+
bulkBarrierPreWrite(uintptr(dstPtr), uintptr(srcPtr), pwsize, typ)
283299
}
284300
// See typedmemmove for a discussion of the race between the
285301
// barrier and memmove.
@@ -308,7 +324,10 @@ func reflect_typedslicecopy(elemType *_type, dst, src slice) int {
308324
//go:nosplit
309325
func typedmemclr(typ *_type, ptr unsafe.Pointer) {
310326
if writeBarrier.enabled && typ.PtrBytes != 0 {
311-
bulkBarrierPreWrite(uintptr(ptr), 0, typ.PtrBytes)
327+
// This always clears a whole value of type typ, so it's
328+
// safe to pass a type here and apply the optimization.
329+
// See the comment on bulkBarrierPreWrite.
330+
bulkBarrierPreWrite(uintptr(ptr), 0, typ.PtrBytes, typ)
312331
}
313332
memclrNoHeapPointers(ptr, typ.Size_)
314333
}
@@ -321,7 +340,11 @@ func reflect_typedmemclr(typ *_type, ptr unsafe.Pointer) {
321340
//go:linkname reflect_typedmemclrpartial reflect.typedmemclrpartial
322341
func reflect_typedmemclrpartial(typ *_type, ptr unsafe.Pointer, off, size uintptr) {
323342
if writeBarrier.enabled && typ.PtrBytes != 0 {
324-
bulkBarrierPreWrite(uintptr(ptr), 0, size)
343+
// Pass nil for the type. ptr does not point to value of type typ,
344+
// but rather points into one so it's not safe to apply the optimization.
345+
// See the comment on this function in the reflect package and the
346+
// comment on bulkBarrierPreWrite.
347+
bulkBarrierPreWrite(uintptr(ptr), 0, size, nil)
325348
}
326349
memclrNoHeapPointers(ptr, size)
327350
}
@@ -330,7 +353,9 @@ func reflect_typedmemclrpartial(typ *_type, ptr unsafe.Pointer, off, size uintpt
330353
func reflect_typedarrayclear(typ *_type, ptr unsafe.Pointer, len int) {
331354
size := typ.Size_ * uintptr(len)
332355
if writeBarrier.enabled && typ.PtrBytes != 0 {
333-
bulkBarrierPreWrite(uintptr(ptr), 0, size)
356+
// This always clears whole elements of an array, so it's
357+
// safe to pass a type here. See the comment on bulkBarrierPreWrite.
358+
bulkBarrierPreWrite(uintptr(ptr), 0, size, typ)
334359
}
335360
memclrNoHeapPointers(ptr, size)
336361
}
@@ -342,6 +367,7 @@ func reflect_typedarrayclear(typ *_type, ptr unsafe.Pointer, len int) {
342367
//
343368
//go:nosplit
344369
func memclrHasPointers(ptr unsafe.Pointer, n uintptr) {
345-
bulkBarrierPreWrite(uintptr(ptr), 0, n)
370+
// Pass nil for the type since we don't have one here anyway.
371+
bulkBarrierPreWrite(uintptr(ptr), 0, n, nil)
346372
memclrNoHeapPointers(ptr, n)
347373
}

0 commit comments

Comments
 (0)