Skip to content

Commit 775d0f3

Browse files
authored
[GVN] Handle scalable vectors with the same size in VNCoercion (#123984)
This allows us to forward to a load even if the types do not match (nxv4i32 vs nxv2i64 for example). Scalable types are allowed in canCoerceMustAliasedValueToLoad so long as the size (minelts * scalarsize) is the same, and some follow-on code is adjusted to make sure it handles scalable sizes correctly. Methods like analyzeLoadFromClobberingWrite and analyzeLoadFromClobberingStore still do nothing for scalable vectors, as Offsets and mismatching types are not supported.
1 parent 4cf1fe2 commit 775d0f3

File tree

2 files changed

+35
-19
lines changed

2 files changed

+35
-19
lines changed

llvm/lib/Transforms/Utils/VNCoercion.cpp

+23-7
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
2121
if (StoredTy == LoadTy)
2222
return true;
2323

24+
if (isa<ScalableVectorType>(StoredTy) && isa<ScalableVectorType>(LoadTy) &&
25+
DL.getTypeSizeInBits(StoredTy) == DL.getTypeSizeInBits(LoadTy))
26+
return true;
27+
2428
// If the loaded/stored value is a first class array/struct, or scalable type,
2529
// don't try to transform them. We need to be able to bitcast to integer.
2630
if (isFirstClassAggregateOrScalableType(LoadTy) ||
@@ -83,8 +87,8 @@ Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
8387
// If this is already the right type, just return it.
8488
Type *StoredValTy = StoredVal->getType();
8589

86-
uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy).getFixedValue();
87-
uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy).getFixedValue();
90+
TypeSize StoredValSize = DL.getTypeSizeInBits(StoredValTy);
91+
TypeSize LoadedValSize = DL.getTypeSizeInBits(LoadedTy);
8892

8993
// If the store and reload are the same size, we can always reuse it.
9094
if (StoredValSize == LoadedValSize) {
@@ -118,7 +122,8 @@ Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
118122
// If the loaded value is smaller than the available value, then we can
119123
// extract out a piece from it. If the available value is too small, then we
120124
// can't do anything.
121-
assert(StoredValSize >= LoadedValSize &&
125+
assert(!StoredValSize.isScalable() &&
126+
TypeSize::isKnownGE(StoredValSize, LoadedValSize) &&
122127
"canCoerceMustAliasedValueToLoad fail");
123128

124129
// Convert source pointers to integers, which can be manipulated.
@@ -303,6 +308,13 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
303308
return SrcVal;
304309
}
305310

311+
// Return scalable values directly to avoid needing to bitcast to integer
312+
// types, as we do not support non-zero Offsets.
313+
if (isa<ScalableVectorType>(LoadTy)) {
314+
assert(Offset == 0 && "Expected a zero offset for scalable types");
315+
return SrcVal;
316+
}
317+
306318
uint64_t StoreSize =
307319
(DL.getTypeSizeInBits(SrcVal->getType()).getFixedValue() + 7) / 8;
308320
uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedValue() + 7) / 8;
@@ -333,11 +345,15 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
333345

334346
Value *getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
335347
Instruction *InsertPt, const DataLayout &DL) {
336-
337348
#ifndef NDEBUG
338-
unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType()).getFixedValue();
339-
unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue();
340-
assert(Offset + LoadSize <= SrcValSize);
349+
TypeSize SrcValSize = DL.getTypeStoreSize(SrcVal->getType());
350+
TypeSize LoadSize = DL.getTypeStoreSize(LoadTy);
351+
assert(SrcValSize.isScalable() == LoadSize.isScalable());
352+
assert((SrcValSize.isScalable() || Offset + LoadSize <= SrcValSize) &&
353+
"Expected Offset + LoadSize <= SrcValSize");
354+
assert(
355+
(!SrcValSize.isScalable() || (Offset == 0 && LoadSize == SrcValSize)) &&
356+
"Expected scalable type sizes to match");
341357
#endif
342358
IRBuilder<> Builder(InsertPt);
343359
SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL);

llvm/test/Transforms/GVN/vscale.ll

+12-12
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ if.else:
393393
define <vscale x 16 x i8> @load_v16i8_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
394394
; CHECK-LABEL: @load_v16i8_store_v4i32_forward_load(
395395
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
396-
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
396+
; CHECK-NEXT: [[LOAD:%.*]] = bitcast <vscale x 4 x i32> [[X]] to <vscale x 16 x i8>
397397
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
398398
;
399399
store <vscale x 4 x i32> %x, ptr %p
@@ -404,7 +404,7 @@ define <vscale x 16 x i8> @load_v16i8_store_v4i32_forward_load(ptr %p, <vscale x
404404
define <vscale x 4 x float> @load_v4f32_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
405405
; CHECK-LABEL: @load_v4f32_store_v4i32_forward_load(
406406
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
407-
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x float>, ptr [[P]], align 16
407+
; CHECK-NEXT: [[LOAD:%.*]] = bitcast <vscale x 4 x i32> [[X]] to <vscale x 4 x float>
408408
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
409409
;
410410
store <vscale x 4 x i32> %x, ptr %p
@@ -415,7 +415,7 @@ define <vscale x 4 x float> @load_v4f32_store_v4i32_forward_load(ptr %p, <vscale
415415
define <vscale x 4 x float> @load_v4f32_store_v16i8_forward_load(ptr %p, <vscale x 16 x i8> %x) {
416416
; CHECK-LABEL: @load_v4f32_store_v16i8_forward_load(
417417
; CHECK-NEXT: store <vscale x 16 x i8> [[X:%.*]], ptr [[P:%.*]], align 16
418-
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x float>, ptr [[P]], align 16
418+
; CHECK-NEXT: [[LOAD:%.*]] = bitcast <vscale x 16 x i8> [[X]] to <vscale x 4 x float>
419419
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
420420
;
421421
store <vscale x 16 x i8> %x, ptr %p
@@ -426,7 +426,7 @@ define <vscale x 4 x float> @load_v4f32_store_v16i8_forward_load(ptr %p, <vscale
426426
define <vscale x 4 x i32> @load_v4i32_store_v4f32_forward_load(ptr %p, <vscale x 4 x float> %x) {
427427
; CHECK-LABEL: @load_v4i32_store_v4f32_forward_load(
428428
; CHECK-NEXT: store <vscale x 4 x float> [[X:%.*]], ptr [[P:%.*]], align 16
429-
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
429+
; CHECK-NEXT: [[LOAD:%.*]] = bitcast <vscale x 4 x float> [[X]] to <vscale x 4 x i32>
430430
; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
431431
;
432432
store <vscale x 4 x float> %x, ptr %p
@@ -496,7 +496,8 @@ define <vscale x 2 x i32> @load_v2i32_store_v4i32_forward_load_offsetc(ptr %p, <
496496
define <vscale x 2 x ptr> @load_v2p0_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
497497
; CHECK-LABEL: @load_v2p0_store_v4i32_forward_load(
498498
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
499-
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x ptr>, ptr [[P]], align 16
499+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <vscale x 4 x i32> [[X]] to <vscale x 2 x i64>
500+
; CHECK-NEXT: [[LOAD:%.*]] = inttoptr <vscale x 2 x i64> [[TMP1]] to <vscale x 2 x ptr>
500501
; CHECK-NEXT: ret <vscale x 2 x ptr> [[LOAD]]
501502
;
502503
store <vscale x 4 x i32> %x, ptr %p
@@ -507,7 +508,7 @@ define <vscale x 2 x ptr> @load_v2p0_store_v4i32_forward_load(ptr %p, <vscale x
507508
define <vscale x 2 x i64> @load_v2i64_store_v2p0_forward_load(ptr %p, <vscale x 2 x ptr> %x) {
508509
; CHECK-LABEL: @load_v2i64_store_v2p0_forward_load(
509510
; CHECK-NEXT: store <vscale x 2 x ptr> [[X:%.*]], ptr [[P:%.*]], align 16
510-
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[P]], align 16
511+
; CHECK-NEXT: [[LOAD:%.*]] = ptrtoint <vscale x 2 x ptr> [[X]] to <vscale x 2 x i64>
511512
; CHECK-NEXT: ret <vscale x 2 x i64> [[LOAD]]
512513
;
513514
store <vscale x 2 x ptr> %x, ptr %p
@@ -540,8 +541,7 @@ define <16 x i8> @load_v16i8_store_nxv4i32_forward_load(ptr %p, <vscale x 4 x i3
540541
define <vscale x 16 x i8> @load_v16i8_store_v4i32_forward_constant(ptr %p) {
541542
; CHECK-LABEL: @load_v16i8_store_v4i32_forward_constant(
542543
; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 4), ptr [[P:%.*]], align 16
543-
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
544-
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
544+
; CHECK-NEXT: ret <vscale x 16 x i8> bitcast (<vscale x 4 x i32> splat (i32 4) to <vscale x 16 x i8>)
545545
;
546546
store <vscale x 4 x i32> splat (i32 4), ptr %p
547547
%load = load <vscale x 16 x i8>, ptr %p
@@ -590,13 +590,13 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
590590
; CHECK-NEXT: [[REF_TMP_REPACK5:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP5]]
591591
; CHECK-NEXT: [[A_ELT6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 3
592592
; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT6]], ptr [[REF_TMP_REPACK5]], align 16
593-
; CHECK-NEXT: [[DOTUNPACK:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP]], align 16
593+
; CHECK-NEXT: [[DOTUNPACK:%.*]] = bitcast <vscale x 4 x i32> [[A_ELT]] to <vscale x 16 x i8>
594594
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> [[DOTUNPACK]], 0
595-
; CHECK-NEXT: [[DOTUNPACK8:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK1]], align 16
595+
; CHECK-NEXT: [[DOTUNPACK8:%.*]] = bitcast <vscale x 4 x i32> [[A_ELT2]] to <vscale x 16 x i8>
596596
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP6]], <vscale x 16 x i8> [[DOTUNPACK8]], 1
597-
; CHECK-NEXT: [[DOTUNPACK10:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK3]], align 16
597+
; CHECK-NEXT: [[DOTUNPACK10:%.*]] = bitcast <vscale x 4 x i32> [[A_ELT4]] to <vscale x 16 x i8>
598598
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP9]], <vscale x 16 x i8> [[DOTUNPACK10]], 2
599-
; CHECK-NEXT: [[DOTUNPACK12:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK5]], align 16
599+
; CHECK-NEXT: [[DOTUNPACK12:%.*]] = bitcast <vscale x 4 x i32> [[A_ELT6]] to <vscale x 16 x i8>
600600
; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP12]], <vscale x 16 x i8> [[DOTUNPACK12]], 3
601601
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull [[REF_TMP]])
602602
; CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP15]]

0 commit comments

Comments
 (0)