@@ -284,7 +284,7 @@ defm SHRX64: ShiftX<"shrx", Xi64>, XD;
284
284
defm SHLX32: ShiftX<"shlx", Xi32>, PD;
285
285
defm SHLX64: ShiftX<"shlx", Xi64>, PD;
286
286
287
- let Predicates = [HasBMI2] in {
287
+ multiclass RORX_Pats {
288
288
// Prefer RORX which is non-destructive and doesn't update EFLAGS.
289
289
let AddedComplexity = 10 in {
290
290
def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
@@ -307,7 +307,9 @@ let Predicates = [HasBMI2] in {
307
307
(RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
308
308
def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
309
309
(RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
310
+ }
310
311
312
+ multiclass ShiftX_Pats<SDNode op> {
311
313
// Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not
312
314
// immediate shift, i.e. the following code is considered better
313
315
//
@@ -322,34 +324,13 @@ let Predicates = [HasBMI2] in {
322
324
// ... %edi, ...
323
325
//
324
326
let AddedComplexity = 1 in {
325
- def : Pat<(sra GR32:$src1, GR8:$src2),
326
- (SARX32rr GR32:$src1,
327
- (INSERT_SUBREG
328
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
329
- def : Pat<(sra GR64:$src1, GR8:$src2),
330
- (SARX64rr GR64:$src1,
331
- (INSERT_SUBREG
332
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
333
-
334
- def : Pat<(srl GR32:$src1, GR8:$src2),
335
- (SHRX32rr GR32:$src1,
336
- (INSERT_SUBREG
337
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
338
- def : Pat<(srl GR64:$src1, GR8:$src2),
339
- (SHRX64rr GR64:$src1,
340
- (INSERT_SUBREG
341
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
342
-
343
- def : Pat<(shl GR32:$src1, GR8:$src2),
344
- (SHLX32rr GR32:$src1,
345
- (INSERT_SUBREG
346
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
347
- def : Pat<(shl GR64:$src1, GR8:$src2),
348
- (SHLX64rr GR64:$src1,
349
- (INSERT_SUBREG
350
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
327
+ def : Pat<(op GR32:$src1, GR8:$src2),
328
+ (!cast<Instruction>(NAME#"32rr") GR32:$src1,
329
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
330
+ def : Pat<(op GR64:$src1, GR8:$src2),
331
+ (!cast<Instruction>(NAME#"64rr") GR64:$src1,
332
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
351
333
}
352
-
353
334
// We prefer to use
354
335
// mov (%ecx), %esi
355
336
// shl $imm, $esi
@@ -360,30 +341,17 @@ let Predicates = [HasBMI2] in {
360
341
// shlx %al, (%ecx), %esi
361
342
//
362
343
// This priority is enforced by IsProfitableToFoldLoad.
363
- def : Pat<(sra (loadi32 addr:$src1), GR8:$src2),
364
- (SARX32rm addr:$src1,
365
- (INSERT_SUBREG
366
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
367
- def : Pat<(sra (loadi64 addr:$src1), GR8:$src2),
368
- (SARX64rm addr:$src1,
369
- (INSERT_SUBREG
370
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
371
-
372
- def : Pat<(srl (loadi32 addr:$src1), GR8:$src2),
373
- (SHRX32rm addr:$src1,
374
- (INSERT_SUBREG
375
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
376
- def : Pat<(srl (loadi64 addr:$src1), GR8:$src2),
377
- (SHRX64rm addr:$src1,
378
- (INSERT_SUBREG
379
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
380
-
381
- def : Pat<(shl (loadi32 addr:$src1), GR8:$src2),
382
- (SHLX32rm addr:$src1,
383
- (INSERT_SUBREG
384
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
385
- def : Pat<(shl (loadi64 addr:$src1), GR8:$src2),
386
- (SHLX64rm addr:$src1,
387
- (INSERT_SUBREG
388
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
344
+ def : Pat<(op (loadi32 addr:$src1), GR8:$src2),
345
+ (!cast<Instruction>(NAME#"32rm") addr:$src1,
346
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
347
+ def : Pat<(op (loadi64 addr:$src1), GR8:$src2),
348
+ (!cast<Instruction>(NAME#"64rm") addr:$src1,
349
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
350
+ }
351
+
352
+ let Predicates = [HasBMI2] in {
353
+ defm : RORX_Pats;
354
+ defm SARX : ShiftX_Pats<sra>;
355
+ defm SHRX : ShiftX_Pats<srl>;
356
+ defm SHLX : ShiftX_Pats<shl>;
389
357
}
0 commit comments