@@ -8750,8 +8750,13 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
8750
8750
return SDValue ();
8751
8751
8752
8752
int NumVecElts = VTy.getVectorNumElements ();
8753
- if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16 )
8754
- return SDValue ();
8753
+ if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
8754
+ if (NumVecElts != 4 )
8755
+ return SDValue ();
8756
+ } else {
8757
+ if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16 )
8758
+ return SDValue ();
8759
+ }
8755
8760
8756
8761
int NumExpectedSteps = APInt (8 , NumVecElts).logBase2 ();
8757
8762
SDValue PreOp = OpV;
@@ -8802,6 +8807,8 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
8802
8807
PreOp = CurOp;
8803
8808
}
8804
8809
unsigned Opcode;
8810
+ bool IsIntrinsic = false ;
8811
+
8805
8812
switch (Op) {
8806
8813
default :
8807
8814
llvm_unreachable (" Unexpected operator for across vector reduction" );
@@ -8820,11 +8827,24 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
8820
8827
case ISD::UMIN:
8821
8828
Opcode = AArch64ISD::UMINV;
8822
8829
break ;
8830
+ case ISD::FMAXNUM:
8831
+ Opcode = Intrinsic::aarch64_neon_fmaxnmv;
8832
+ IsIntrinsic = true ;
8833
+ break ;
8834
+ case ISD::FMINNUM:
8835
+ Opcode = Intrinsic::aarch64_neon_fminnmv;
8836
+ IsIntrinsic = true ;
8837
+ break ;
8823
8838
}
8824
8839
SDLoc DL (N);
8825
- return DAG.getNode (ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType (0 ),
8826
- DAG.getNode (Opcode, DL, PreOp.getSimpleValueType (), PreOp),
8827
- DAG.getConstant (0 , DL, MVT::i64));
8840
+
8841
+ return IsIntrinsic
8842
+ ? DAG.getNode (ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType (0 ),
8843
+ DAG.getConstant (Opcode, DL, MVT::i32), PreOp)
8844
+ : DAG.getNode (
8845
+ ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType (0 ),
8846
+ DAG.getNode (Opcode, DL, PreOp.getSimpleValueType (), PreOp),
8847
+ DAG.getConstant (0 , DL, MVT::i64));
8828
8848
}
8829
8849
8830
8850
// / Target-specific DAG combine for the across vector min/max reductions.
@@ -8848,9 +8868,6 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
8848
8868
// / becomes :
8849
8869
// / %1 = smaxv %0
8850
8870
// / %result = extract_vector_elt %1, 0
8851
- // / FIXME: Currently this function matches only SMAXV, UMAXV, SMINV, and UMINV.
8852
- // / We could also support other types of across lane reduction available
8853
- // / in AArch64, including FMAXNMV, FMAXV, FMINNMV, and FMINV.
8854
8871
static SDValue
8855
8872
performAcrossLaneMinMaxReductionCombine (SDNode *N, SelectionDAG &DAG,
8856
8873
const AArch64Subtarget *Subtarget) {
@@ -8878,17 +8895,26 @@ performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
8878
8895
SDValue VectorOp = SetCC.getOperand (0 );
8879
8896
unsigned Op = VectorOp->getOpcode ();
8880
8897
// Check if the input vector is fed by the operator we want to handle.
8881
- if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN && Op != ISD::UMIN)
8898
+ if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN &&
8899
+ Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM)
8882
8900
return SDValue ();
8883
8901
8884
8902
EVT VTy = VectorOp.getValueType ();
8885
8903
if (!VTy.isVector ())
8886
8904
return SDValue ();
8887
8905
8888
- EVT EltTy = VTy.getVectorElementType ();
8889
- if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
8906
+ if (VTy.getSizeInBits () < 64 )
8890
8907
return SDValue ();
8891
8908
8909
+ EVT EltTy = VTy.getVectorElementType ();
8910
+ if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
8911
+ if (EltTy != MVT::f32)
8912
+ return SDValue ();
8913
+ } else {
8914
+ if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
8915
+ return SDValue ();
8916
+ }
8917
+
8892
8918
// Check if extracting from the same vector.
8893
8919
// For example,
8894
8920
// %sc = setcc %vector, %svn1, gt
@@ -8904,7 +8930,13 @@ performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
8904
8930
if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) ||
8905
8931
(Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) ||
8906
8932
(Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) ||
8907
- (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE))
8933
+ (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) ||
8934
+ (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE &&
8935
+ CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT &&
8936
+ CC != ISD::SETGE) ||
8937
+ (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE &&
8938
+ CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT &&
8939
+ CC != ISD::SETLE))
8908
8940
return SDValue ();
8909
8941
8910
8942
// Expect to check only lane 0 from the vector SETCC.
@@ -8963,6 +8995,9 @@ performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG,
8963
8995
if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
8964
8996
return SDValue ();
8965
8997
8998
+ if (VTy.getSizeInBits () < 64 )
8999
+ return SDValue ();
9000
+
8966
9001
return tryMatchAcrossLaneShuffleForReduction (N, N0, ISD::ADD, DAG);
8967
9002
}
8968
9003
0 commit comments