@@ -13371,27 +13371,6 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
13371
13371
return SDValue();
13372
13372
}
13373
13373
13374
- // A special case is if the stride is exactly the width of one of the loads,
13375
- // in which case it's contiguous and can be combined into a regular vle
13376
- // without changing the element size
13377
- if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
13378
- ConstStride && !Reversed &&
13379
- ConstStride->getZExtValue() == BaseLdVT.getFixedSizeInBits() / 8) {
13380
- MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
13381
- BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(),
13382
- VT.getStoreSize(), Align);
13383
- // Can't do the combine if the load isn't naturally aligned with the element
13384
- // type
13385
- if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(),
13386
- DAG.getDataLayout(), VT, *MMO))
13387
- return SDValue();
13388
-
13389
- SDValue WideLoad = DAG.getLoad(VT, DL, BaseLd->getChain(), BasePtr, MMO);
13390
- for (SDValue Ld : N->ops())
13391
- DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), WideLoad);
13392
- return WideLoad;
13393
- }
13394
-
13395
13374
// Get the widened scalar type, e.g. v4i8 -> i64
13396
13375
unsigned WideScalarBitWidth =
13397
13376
BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
@@ -13406,20 +13385,22 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
13406
13385
if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
13407
13386
return SDValue();
13408
13387
13409
- MVT ContainerVT = TLI.getContainerForFixedLengthVector(WideVecVT);
13410
- SDValue VL =
13411
- getDefaultVLOps(WideVecVT, ContainerVT, DL, DAG, Subtarget).second;
13412
- SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13388
+ SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
13413
13389
SDValue IntID =
13414
- DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, Subtarget.getXLenVT());
13390
+ DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
13391
+ Subtarget.getXLenVT());
13415
13392
if (Reversed)
13416
13393
Stride = DAG.getNegative(Stride, DL, Stride->getValueType(0));
13394
+ SDValue AllOneMask =
13395
+ DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
13396
+ DAG.getConstant(1, DL, MVT::i1));
13397
+
13417
13398
SDValue Ops[] = {BaseLd->getChain(),
13418
13399
IntID,
13419
- DAG.getUNDEF(ContainerVT ),
13400
+ DAG.getUNDEF(WideVecVT ),
13420
13401
BasePtr,
13421
13402
Stride,
13422
- VL };
13403
+ AllOneMask };
13423
13404
13424
13405
uint64_t MemSize;
13425
13406
if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
@@ -13441,11 +13422,7 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
13441
13422
for (SDValue Ld : N->ops())
13442
13423
DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
13443
13424
13444
- // Note: Perform the bitcast before the convertFromScalableVector so we have
13445
- // balanced pairs of convertFromScalable/convertToScalable
13446
- SDValue Res = DAG.getBitcast(
13447
- TLI.getContainerForFixedLengthVector(VT.getSimpleVT()), StridedLoad);
13448
- return convertFromScalableVector(VT, Res, DAG, Subtarget);
13425
+ return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
13449
13426
}
13450
13427
13451
13428
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
@@ -14184,6 +14161,25 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
14184
14161
// By default we do not combine any intrinsic.
14185
14162
default:
14186
14163
return SDValue();
14164
+ case Intrinsic::riscv_masked_strided_load: {
14165
+ MVT VT = N->getSimpleValueType(0);
14166
+ auto *Load = cast<MemIntrinsicSDNode>(N);
14167
+ SDValue PassThru = N->getOperand(2);
14168
+ SDValue Base = N->getOperand(3);
14169
+ SDValue Stride = N->getOperand(4);
14170
+ SDValue Mask = N->getOperand(5);
14171
+
14172
+ // If the stride is equal to the element size in bytes, we can use
14173
+ // a masked.load.
14174
+ const unsigned ElementSize = VT.getScalarStoreSize();
14175
+ if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
14176
+ StrideC && StrideC->getZExtValue() == ElementSize)
14177
+ return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
14178
+ DAG.getUNDEF(XLenVT), Mask, PassThru,
14179
+ Load->getMemoryVT(), Load->getMemOperand(),
14180
+ ISD::UNINDEXED, ISD::NON_EXTLOAD);
14181
+ return SDValue();
14182
+ }
14187
14183
case Intrinsic::riscv_vcpop:
14188
14184
case Intrinsic::riscv_vcpop_mask:
14189
14185
case Intrinsic::riscv_vfirst:
0 commit comments