Skip to content

Commit 6605686

Browse files
derekparkergopherbot
authored andcommitted
cmd/compile: new inline heuristic for struct compares
This CL changes the heuristic used to determine whether we can inline a struct equality check or if we must generate a function and call that function for equality. The old method was to count struct fields, but this can lead to poor in lining decisions. We should really be determining the cost of the equality check and use that to determine if we should inline or generate a function. The new benchmark provided in this CL returns the following when compared against tip: ``` name old time/op new time/op delta EqStruct-32 2.46ns ± 4% 0.25ns ±10% -89.72% (p=0.000 n=39+39) ``` Fixes #38494 Change-Id: Ie06b80a2b2a03a3fd0978bcaf7715f9afb66e0ab GitHub-Last-Rev: e9a18d9 GitHub-Pull-Request: #53326 Reviewed-on: https://go-review.googlesource.com/c/go/+/411674 Reviewed-by: Keith Randall <[email protected]> Reviewed-by: Keith Randall <[email protected]> Run-TryBot: Keith Randall <[email protected]> Auto-Submit: Keith Randall <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Heschi Kreinick <[email protected]>
1 parent 19a4804 commit 6605686

File tree

5 files changed

+332
-10
lines changed

5 files changed

+332
-10
lines changed

src/cmd/compile/internal/compare/compare.go

Lines changed: 89 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,93 @@ func EqCanPanic(t *types.Type) bool {
7979
}
8080
}
8181

82+
// EqStructCost returns the cost of an equality comparison of two structs.
83+
//
84+
// The cost is determined using an algorithm which takes into consideration
85+
// the size of the registers in the current architecture and the size of the
86+
// memory-only fields in the struct.
87+
func EqStructCost(t *types.Type) int64 {
88+
cost := int64(0)
89+
90+
for i, fields := 0, t.FieldSlice(); i < len(fields); {
91+
f := fields[i]
92+
93+
// Skip blank-named fields.
94+
if f.Sym.IsBlank() {
95+
i++
96+
continue
97+
}
98+
99+
n, _, next := eqStructFieldCost(t, i)
100+
101+
cost += n
102+
i = next
103+
}
104+
105+
return cost
106+
}
107+
108+
// eqStructFieldCost returns the cost of an equality comparison of two struct fields.
109+
// t is the parent struct type, and i is the index of the field in the parent struct type.
110+
// eqStructFieldCost may compute the cost of several adjacent fields at once. It returns
111+
// the cost, the size of the set of fields it computed the cost for (in bytes), and the
112+
// index of the first field not part of the set of fields for which the cost
113+
// has already been calculated.
114+
func eqStructFieldCost(t *types.Type, i int) (int64, int64, int) {
115+
var (
116+
cost = int64(0)
117+
regSize = int64(types.RegSize)
118+
119+
size int64
120+
next int
121+
)
122+
123+
if base.Ctxt.Arch.CanMergeLoads {
124+
// If we can merge adjacent loads then we can calculate the cost of the
125+
// comparison using the size of the memory run and the size of the registers.
126+
size, next = Memrun(t, i)
127+
cost = size / regSize
128+
if size%regSize != 0 {
129+
cost++
130+
}
131+
return cost, size, next
132+
}
133+
134+
// If we cannot merge adjacent loads then we have to use the size of the
135+
// field and take into account the type to determine how many loads and compares
136+
// are needed.
137+
ft := t.Field(i).Type
138+
size = ft.Size()
139+
next = i + 1
140+
141+
return calculateCostForType(ft), size, next
142+
}
143+
144+
func calculateCostForType(t *types.Type) int64 {
145+
var cost int64
146+
switch t.Kind() {
147+
case types.TSTRUCT:
148+
return EqStructCost(t)
149+
case types.TSLICE:
150+
// Slices are not comparable.
151+
base.Fatalf("eqStructFieldCost: unexpected slice type")
152+
case types.TARRAY:
153+
elemCost := calculateCostForType(t.Elem())
154+
cost = t.NumElem() * elemCost
155+
case types.TSTRING, types.TINTER, types.TCOMPLEX64, types.TCOMPLEX128:
156+
cost = 2
157+
case types.TINT64, types.TUINT64:
158+
cost = 8 / int64(types.RegSize)
159+
default:
160+
cost = 1
161+
}
162+
return cost
163+
}
164+
82165
// EqStruct compares two structs np and nq for equality.
83166
// It works by building a list of boolean conditions to satisfy.
84167
// Conditions must be evaluated in the returned order and
85-
// properly short circuited by the caller.
168+
// properly short-circuited by the caller.
86169
func EqStruct(t *types.Type, np, nq ir.Node) []ir.Node {
87170
// The conditions are a list-of-lists. Conditions are reorderable
88171
// within each inner list. The outer lists must be evaluated in order.
@@ -128,18 +211,15 @@ func EqStruct(t *types.Type, np, nq ir.Node) []ir.Node {
128211
continue
129212
}
130213

131-
// Find maximal length run of memory-only fields.
132-
size, next := Memrun(t, i)
133-
134-
// TODO(rsc): All the calls to newname are wrong for
135-
// cross-package unexported fields.
136-
if s := fields[i:next]; len(s) <= 2 {
137-
// Two or fewer fields: use plain field equality.
214+
cost, size, next := eqStructFieldCost(t, i)
215+
if cost <= 4 {
216+
// Cost of 4 or less: use plain field equality.
217+
s := fields[i:next]
138218
for _, f := range s {
139219
and(eqfield(np, nq, ir.OEQ, f.Sym))
140220
}
141221
} else {
142-
// More than two fields: use memequal.
222+
// Higher cost: use memequal.
143223
cc := eqmem(np, nq, f.Sym, size)
144224
and(cc)
145225
}
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
package compare
2+
3+
import (
4+
"cmd/compile/internal/base"
5+
"cmd/compile/internal/typecheck"
6+
"cmd/compile/internal/types"
7+
"cmd/internal/obj"
8+
"cmd/internal/src"
9+
"cmd/internal/sys"
10+
"testing"
11+
)
12+
13+
type typefn func() *types.Type
14+
15+
func init() {
16+
// These are the few constants that need to be initialized in order to use
17+
// the types package without using the typecheck package by calling
18+
// typecheck.InitUniverse() (the normal way to initialize the types package).
19+
types.PtrSize = 8
20+
types.RegSize = 8
21+
types.MaxWidth = 1 << 50
22+
typecheck.InitUniverse()
23+
base.Ctxt = &obj.Link{Arch: &obj.LinkArch{Arch: &sys.Arch{Alignment: 1, CanMergeLoads: true}}}
24+
}
25+
26+
func TestEqStructCost(t *testing.T) {
27+
newByteField := func(parent *types.Type, offset int64) *types.Field {
28+
f := types.NewField(src.XPos{}, parent.Sym(), types.ByteType)
29+
f.Offset = offset
30+
return f
31+
}
32+
newArrayField := func(parent *types.Type, offset int64, len int64, kind types.Kind) *types.Field {
33+
f := types.NewField(src.XPos{}, parent.Sym(), types.NewArray(types.Types[kind], len))
34+
// Call Type.Size here to force the size calculation to be done. If not done here the size returned later is incorrect.
35+
f.Type.Size()
36+
f.Offset = offset
37+
return f
38+
}
39+
newField := func(parent *types.Type, offset int64, kind types.Kind) *types.Field {
40+
f := types.NewField(src.XPos{}, parent.Sym(), types.Types[kind])
41+
f.Offset = offset
42+
return f
43+
}
44+
tt := []struct {
45+
name string
46+
cost int64
47+
nonMergeLoadCost int64
48+
tfn typefn
49+
}{
50+
{"struct without fields", 0, 0,
51+
func() *types.Type {
52+
return types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
53+
}},
54+
{"struct with 1 byte field", 1, 1,
55+
func() *types.Type {
56+
parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
57+
fields := []*types.Field{
58+
newByteField(parent, 0),
59+
}
60+
parent.SetFields(fields)
61+
return parent
62+
},
63+
},
64+
{"struct with 8 byte fields", 1, 8,
65+
func() *types.Type {
66+
parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
67+
fields := make([]*types.Field, 8)
68+
for i := range fields {
69+
fields[i] = newByteField(parent, int64(i))
70+
}
71+
parent.SetFields(fields)
72+
return parent
73+
},
74+
},
75+
{"struct with 16 byte fields", 2, 16,
76+
func() *types.Type {
77+
parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
78+
fields := make([]*types.Field, 16)
79+
for i := range fields {
80+
fields[i] = newByteField(parent, int64(i))
81+
}
82+
parent.SetFields(fields)
83+
return parent
84+
},
85+
},
86+
{"struct with 32 byte fields", 4, 32,
87+
func() *types.Type {
88+
parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
89+
fields := make([]*types.Field, 32)
90+
for i := range fields {
91+
fields[i] = newByteField(parent, int64(i))
92+
}
93+
parent.SetFields(fields)
94+
return parent
95+
},
96+
},
97+
{"struct with 2 int32 fields", 1, 2,
98+
func() *types.Type {
99+
parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
100+
fields := make([]*types.Field, 2)
101+
for i := range fields {
102+
fields[i] = newField(parent, int64(i*4), types.TINT32)
103+
}
104+
parent.SetFields(fields)
105+
return parent
106+
},
107+
},
108+
{"struct with 2 int32 fields and 1 int64", 2, 3,
109+
func() *types.Type {
110+
parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
111+
fields := make([]*types.Field, 3)
112+
fields[0] = newField(parent, int64(0), types.TINT32)
113+
fields[1] = newField(parent, int64(4), types.TINT32)
114+
fields[2] = newField(parent, int64(8), types.TINT64)
115+
parent.SetFields(fields)
116+
return parent
117+
},
118+
},
119+
{"struct with 1 int field and 1 string", 3, 3,
120+
func() *types.Type {
121+
parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
122+
fields := make([]*types.Field, 2)
123+
fields[0] = newField(parent, int64(0), types.TINT64)
124+
fields[1] = newField(parent, int64(8), types.TSTRING)
125+
parent.SetFields(fields)
126+
return parent
127+
},
128+
},
129+
{"struct with 2 strings", 4, 4,
130+
func() *types.Type {
131+
parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
132+
fields := make([]*types.Field, 2)
133+
fields[0] = newField(parent, int64(0), types.TSTRING)
134+
fields[1] = newField(parent, int64(8), types.TSTRING)
135+
parent.SetFields(fields)
136+
return parent
137+
},
138+
},
139+
{"struct with 1 large byte array field", 26, 101,
140+
func() *types.Type {
141+
parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
142+
fields := []*types.Field{
143+
newArrayField(parent, 0, 101, types.TUINT16),
144+
}
145+
parent.SetFields(fields)
146+
return parent
147+
},
148+
},
149+
{"struct with string array field", 4, 4,
150+
func() *types.Type {
151+
parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
152+
fields := []*types.Field{
153+
newArrayField(parent, 0, 2, types.TSTRING),
154+
}
155+
parent.SetFields(fields)
156+
return parent
157+
},
158+
},
159+
}
160+
161+
for _, tc := range tt {
162+
t.Run(tc.name, func(t *testing.T) {
163+
want := tc.cost
164+
base.Ctxt.Arch.CanMergeLoads = true
165+
actual := EqStructCost(tc.tfn())
166+
if actual != want {
167+
t.Errorf("CanMergeLoads=true EqStructCost(%v) = %d, want %d", tc.tfn, actual, want)
168+
}
169+
170+
base.Ctxt.Arch.CanMergeLoads = false
171+
want = tc.nonMergeLoadCost
172+
actual = EqStructCost(tc.tfn())
173+
if actual != want {
174+
t.Errorf("CanMergeLoads=false EqStructCost(%v) = %d, want %d", tc.tfn, actual, want)
175+
}
176+
})
177+
}
178+
}

src/cmd/compile/internal/reflectdata/alg_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,22 @@ func BenchmarkEqArrayOfFloats1024(b *testing.B) {
7474
_ = a == c
7575
}
7676
}
77+
78+
const size = 16
79+
80+
type T1 struct {
81+
a [size]byte
82+
}
83+
84+
func BenchmarkEqStruct(b *testing.B) {
85+
x, y := T1{}, T1{}
86+
x.a = [size]byte{1, 2, 3, 4, 5, 6, 7, 8}
87+
y.a = [size]byte{2, 3, 4, 5, 6, 7, 8, 9}
88+
89+
for i := 0; i < b.N; i++ {
90+
f := x == y
91+
if f {
92+
println("hello")
93+
}
94+
}
95+
}

src/cmd/compile/internal/walk/compare.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ func walkCompare(n *ir.BinaryExpr, init *ir.Nodes) ir.Node {
167167
// We can compare several elements at once with 2/4/8 byte integer compares
168168
inline = t.NumElem() <= 1 || (types.IsSimple[t.Elem().Kind()] && (t.NumElem() <= 4 || t.Elem().Size()*t.NumElem() <= maxcmpsize))
169169
case types.TSTRUCT:
170-
inline = t.NumComponents(types.IgnoreBlankFields) <= 4
170+
inline = compare.EqStructCost(t) <= 4
171171
}
172172

173173
cmpl := n.X

test/codegen/comparisons.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,51 @@ func CompareArray6(a, b unsafe.Pointer) bool {
8484
return *((*[4]byte)(a)) != *((*[4]byte)(b))
8585
}
8686

87+
// Check that some structs generate 2/4/8 byte compares.
88+
89+
type T1 struct {
90+
a [8]byte
91+
}
92+
93+
func CompareStruct1(s1, s2 T1) bool {
94+
// amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
95+
// amd64:-`CALL`
96+
return s1 == s2
97+
}
98+
99+
type T2 struct {
100+
a [16]byte
101+
}
102+
103+
func CompareStruct2(s1, s2 T2) bool {
104+
// amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
105+
// amd64:-`CALL`
106+
return s1 == s2
107+
}
108+
109+
// Assert that a memequal call is still generated when
110+
// inlining would increase binary size too much.
111+
112+
type T3 struct {
113+
a [24]byte
114+
}
115+
116+
func CompareStruct3(s1, s2 T3) bool {
117+
// amd64:-`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
118+
// amd64:`CALL`
119+
return s1 == s2
120+
}
121+
122+
type T4 struct {
123+
a [32]byte
124+
}
125+
126+
func CompareStruct4(s1, s2 T4) bool {
127+
// amd64:-`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
128+
// amd64:`CALL`
129+
return s1 == s2
130+
}
131+
87132
// -------------- //
88133
// Ordering //
89134
// -------------- //

0 commit comments

Comments
 (0)