|
4 | 4 |
|
5 | 5 | package ssa
|
6 | 6 |
|
| 7 | +import ( |
| 8 | + "fmt" |
| 9 | + "sort" |
| 10 | +) |
| 11 | + |
7 | 12 | // layout orders basic blocks in f with the goal of minimizing control flow instructions.
|
8 | 13 | // After this phase returns, the order of f.Blocks matters and is the order
|
9 | 14 | // in which those blocks will appear in the assembly output.
|
10 | 15 | func layout(f *Func) {
|
11 |
| - f.Blocks = layoutOrder(f) |
| 16 | + f.Blocks = greedyBlockOrder(f) |
12 | 17 | }
|
13 | 18 |
|
14 | 19 | // Register allocation may use a different order which has constraints
|
@@ -183,3 +188,222 @@ blockloop:
|
183 | 188 | return order
|
184 | 189 | //f.Blocks = order
|
185 | 190 | }
|
| 191 | + |
| 192 | +// ---------------------------------------------------------------------------- |
| 193 | +// Greedy Basic Block Layout |
| 194 | +// |
| 195 | +// This is an adaptation of Pettis & Hansen's greedy algorithm for laying out |
| 196 | +// basic blocks. See Profile Guided Code Positioning by Pettis & Hansen. The idea |
| 197 | +// is to arrange hot blocks near each other. Initially all blocks are belongs to |
| 198 | +// its own chain, then starting from hottest edge and repeatedly merge two proper |
| 199 | +// chains iff the edge dest is the first block of dest chain and edge src is the |
| 200 | +// last block of src chain. Once all edges are processed, the chains are sorted |
| 201 | +// by hottness and merge count and generate final block order. |
| 202 | + |
| 203 | +// chain is a linear sequence of blocks |
| 204 | +type chain struct { |
| 205 | + id int |
| 206 | + blocks []*Block |
| 207 | + priority int // merge count |
| 208 | +} |
| 209 | + |
| 210 | +func (t *chain) first() *Block { |
| 211 | + return t.blocks[0] |
| 212 | +} |
| 213 | + |
| 214 | +func (t *chain) last() *Block { |
| 215 | + return t.blocks[len(t.blocks)-1] |
| 216 | +} |
| 217 | + |
| 218 | +// edge simply represents a CFG edge |
| 219 | +type edge struct { |
| 220 | + src *Block |
| 221 | + dst *Block |
| 222 | + weight int // frequency |
| 223 | +} |
| 224 | + |
| 225 | +const ( |
| 226 | + WeightTaken = 100 |
| 227 | + WeightNotTaken = 0 |
| 228 | +) |
| 229 | + |
| 230 | +func (e *edge) String() string { |
| 231 | + return fmt.Sprintf("%v->%v(%d)", e.src, e.dst, e.weight) |
| 232 | +} |
| 233 | + |
| 234 | +type chainGraph struct { |
| 235 | + chainId int |
| 236 | + chains []*chain |
| 237 | + edges []*edge |
| 238 | + b2chain map[*Block]*chain |
| 239 | +} |
| 240 | + |
| 241 | +func (g *chainGraph) newChain(block *Block) *chain { |
| 242 | + tr := &chain{g.chainId, []*Block{block}, 0 /*priority*/} |
| 243 | + g.b2chain[block] = tr |
| 244 | + g.chains = append(g.chains, tr) |
| 245 | + g.chainId++ |
| 246 | + return tr |
| 247 | +} |
| 248 | + |
| 249 | +func (g *chainGraph) getChain(b *Block) *chain { |
| 250 | + return g.b2chain[b] |
| 251 | +} |
| 252 | + |
| 253 | +func (g *chainGraph) mergeChain(to, from *chain) { |
| 254 | + for _, block := range from.blocks { |
| 255 | + g.b2chain[block] = to |
| 256 | + } |
| 257 | + to.blocks = append(to.blocks, from.blocks...) |
| 258 | + to.priority++ // increment |
| 259 | + g.chains[from.id] = nil |
| 260 | +} |
| 261 | + |
| 262 | +func (g *chainGraph) print() { |
| 263 | + fmt.Printf("== Edges:\n") |
| 264 | + for _, edge := range g.edges { |
| 265 | + fmt.Printf("%v\n", edge) |
| 266 | + } |
| 267 | + fmt.Printf("== Chains:\n") |
| 268 | + for _, ch := range g.chains { |
| 269 | + if ch == nil { |
| 270 | + continue |
| 271 | + } |
| 272 | + fmt.Printf("id:%d priority:%d blocks:%v\n", ch.id, ch.priority, ch.blocks) |
| 273 | + } |
| 274 | +} |
| 275 | + |
| 276 | +func greedyBlockOrder(fn *Func) []*Block { |
| 277 | + graph := &chainGraph{0, []*chain{}, []*edge{}, make(map[*Block]*chain)} |
| 278 | + |
| 279 | + // Initially every block is in its own chain |
| 280 | + for _, block := range fn.Blocks { |
| 281 | + graph.newChain(block) |
| 282 | + |
| 283 | + if len(block.Succs) == 1 { |
| 284 | + graph.edges = append(graph.edges, &edge{block, block.Succs[0].b, WeightTaken}) |
| 285 | + } else if len(block.Succs) == 2 && block.Likely != BranchUnknown { |
| 286 | + // Static branch prediction is available |
| 287 | + taken := 0 |
| 288 | + if block.Likely == BranchUnlikely { |
| 289 | + taken = 1 |
| 290 | + } |
| 291 | + e1 := &edge{block, block.Succs[taken].b, WeightTaken} |
| 292 | + e2 := &edge{block, block.Succs[1-taken].b, WeightNotTaken} |
| 293 | + graph.edges = append(graph.edges, e1, e2) |
| 294 | + } else { |
| 295 | + // Block predication is unknown or there are more than 2 successors |
| 296 | + for _, succ := range block.Succs { |
| 297 | + e1 := &edge{block, succ.b, WeightTaken} |
| 298 | + graph.edges = append(graph.edges, e1) |
| 299 | + } |
| 300 | + } |
| 301 | + } |
| 302 | + |
| 303 | + // Sort edges by weight and move slow path to end |
| 304 | + j := len(graph.edges) - 1 |
| 305 | + for i, edge := range graph.edges { |
| 306 | + if edge.weight == 0 { |
| 307 | + if edge.dst.Kind == BlockExit && i < j { |
| 308 | + graph.edges[j], graph.edges[i] = graph.edges[i], graph.edges[j] |
| 309 | + j-- |
| 310 | + } |
| 311 | + } |
| 312 | + } |
| 313 | + sort.SliceStable(graph.edges, func(i, j int) bool { |
| 314 | + e1, e2 := graph.edges[i], graph.edges[j] |
| 315 | + // If the weights are the same, then keep the original order, this |
| 316 | + // ensures that adjacent edges are accessed sequentially, which has |
| 317 | + // a noticeable impact on performance |
| 318 | + return e1.weight >= e2.weight |
| 319 | + }) |
| 320 | + |
| 321 | + // Merge proper chains until no more chains can be merged |
| 322 | + for _, edge := range graph.edges { |
| 323 | + src := graph.getChain(edge.src) |
| 324 | + dst := graph.getChain(edge.dst) |
| 325 | + if src == dst { |
| 326 | + // Loop detected, "rotate" the loop from [..,header,body,latch] to |
| 327 | + // [..,body,latch,header] |
| 328 | + for idx, block := range src.blocks { |
| 329 | + if block == edge.dst && block.Kind != BlockPlain /*already rotated?*/ { |
| 330 | + c := append(src.blocks[0:idx], src.blocks[idx+1:]...) |
| 331 | + c = append(c, block) |
| 332 | + src.blocks = c |
| 333 | + break |
| 334 | + } |
| 335 | + } |
| 336 | + continue |
| 337 | + } |
| 338 | + if edge.dst == dst.first() && edge.src == src.last() { |
| 339 | + graph.mergeChain(src, dst) |
| 340 | + } |
| 341 | + } |
| 342 | + for i := 0; i < len(graph.chains); i++ { |
| 343 | + // Remove nil chains because they are merged |
| 344 | + if graph.chains[i] == nil { |
| 345 | + graph.chains = append(graph.chains[:i], graph.chains[i+1:]...) |
| 346 | + i-- |
| 347 | + } else if graph.chains[i].first() == fn.Entry { |
| 348 | + // Entry chain must be present at beginning |
| 349 | + graph.chains[0], graph.chains[i] = graph.chains[i], graph.chains[0] |
| 350 | + } |
| 351 | + } |
| 352 | + |
| 353 | + // Reorder chains based by hottness and priority |
| 354 | + before := make(map[*chain][]*chain) |
| 355 | + for _, edge := range graph.edges { |
| 356 | + // Compute the "before" precedence relation between chain, specifically, |
| 357 | + // the chain that is taken is arranged before the chain that is not taken. |
| 358 | + // This is because hardware prediction thought forward branch is less |
| 359 | + // frequently taken, while backedge is more frequently taken. |
| 360 | + if edge.weight == WeightNotTaken { |
| 361 | + src := graph.getChain(edge.src) |
| 362 | + dst := graph.getChain(edge.dst) |
| 363 | + before[src] = append(before[src], dst) |
| 364 | + } |
| 365 | + } |
| 366 | + // assert(graph.chains[0].first() == fn.Entry, "entry chain must be first") |
| 367 | + const idxSkipEntry = 1 // Entry chain is always first |
| 368 | + sort.SliceStable(graph.chains[idxSkipEntry:], func(i, j int) bool { |
| 369 | + c1, c2 := graph.chains[i+idxSkipEntry], graph.chains[j+idxSkipEntry] |
| 370 | + // Respect precedence relation |
| 371 | + for _, b := range before[c1] { |
| 372 | + if b == c2 { |
| 373 | + return true |
| 374 | + } |
| 375 | + } |
| 376 | + // Higher merge count is considered |
| 377 | + if c1.priority != c2.priority { |
| 378 | + return c1.priority > c2.priority |
| 379 | + } |
| 380 | + // Non-terminated chain is considered |
| 381 | + if s1, s2 := len(c1.last().Succs), len(c2.last().Succs); s1 != s2 { |
| 382 | + return s1 > s2 |
| 383 | + } |
| 384 | + // Keep original order if we can't decide |
| 385 | + return true |
| 386 | + }) |
| 387 | + |
| 388 | + // Generate final block order |
| 389 | + blockOrder := make([]*Block, 0) |
| 390 | + for _, chain := range graph.chains { |
| 391 | + blockOrder = append(blockOrder, chain.blocks...) |
| 392 | + } |
| 393 | + fn.laidout = true |
| 394 | + |
| 395 | + if fn.pass.debug > 2 { |
| 396 | + fmt.Printf("Block ordering(%v):\n", fn.Name) |
| 397 | + graph.print() |
| 398 | + } |
| 399 | + if len(blockOrder) != len(fn.Blocks) { |
| 400 | + graph.print() |
| 401 | + fn.Fatalf("miss blocks in final order") |
| 402 | + } |
| 403 | + if entryChain := graph.getChain(fn.Entry); entryChain != graph.chains[0] || |
| 404 | + entryChain.first() != fn.Entry { |
| 405 | + graph.print() |
| 406 | + fn.Fatalf("entry block is not first block") |
| 407 | + } |
| 408 | + return blockOrder |
| 409 | +} |
0 commit comments