-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[Hexagon] Added v32i1/v64i1 to v32f32/v64f16 lowering #159355
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This patch introduces uint_to_fp conversions from v32i1 and v64i1 predicate vectors to v32f32 and v64f16 floating-point vectors. Patch-by: Santanu Das Change-Id: I4616238ffc29161971cdae5010ade99ac916c82e
@llvm/pr-subscribers-backend-hexagon Author: None (pkarveti) ChangesThis patch introduces uint_to_fp conversions from v32i1 and v64i1 predicate vectors to v32f32 and v64f16 floating-point vectors. Patch-by: Santanu Das Full diff: https://github.com/llvm/llvm-project/pull/159355.diff 5 Files Affected:
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index c54b67ccd8843..9f7f434b66fa1 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3352,7 +3352,6 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
-
// Handle INLINEASM first.
if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
return LowerINLINEASM(Op, DAG);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 9ebbbc6399b42..8d04edbea5b43 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -577,6 +577,8 @@ class HexagonTargetLowering : public TargetLowering {
SDValue LowerHvxFpExtend(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxPred32ToFp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxPred64ToFp(SDValue Op, SelectionDAG &DAG) const;
SDValue ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const;
SDValue ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index ff02a67d54363..d0dfa47468705 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -446,6 +446,10 @@ HexagonTargetLowering::initializeHVXLowering() {
}
}
+ // Include cases which are not hander earlier
+ setOperationAction(ISD::UINT_TO_FP, MVT::v32i1, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v64i1, Custom);
+
setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT});
}
@@ -2333,6 +2337,123 @@ HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
return ExpandHvxFpToInt(Op, DAG);
}
+// For vector type v32i1 uint_to_fp to v32f32:
+// R1 = #1, R2 holds the v32i1 param
+// V1 = vsplat(R1)
+// V2 = vsplat(R2)
+// Q0 = vand(V1,R1)
+// V0.w=prefixsum(Q0)
+// V0.w=vsub(V0.w,V1.w)
+// V2.w = vlsr(V2.w,V0.w)
+// V2 = vand(V2,V1)
+// V2.sf = V2.w
+SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
+ SelectionDAG &DAG) const {
+
+ MVT ResTy = ty(PredOp);
+ const SDLoc &dl(PredOp);
+
+ SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
+ SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
+ SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+ SDValue(RegConst, 0));
+ SDNode *PredTransfer =
+ DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
+ SDValue(SplatConst, 0), SDValue(RegConst, 0));
+ SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
+ SDValue(PredTransfer, 0));
+ SDNode *SplatParam = DAG.getMachineNode(
+ Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
+ SDNode *Vsub =
+ DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
+ SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
+ SDNode *IndexShift =
+ DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
+ SDValue(SplatParam, 0), SDValue(Vsub, 0));
+ SDNode *MaskOff =
+ DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
+ SDValue(IndexShift, 0), SDValue(SplatConst, 0));
+ SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
+ SDValue(MaskOff, 0));
+ return SDValue(Convert, 0);
+}
+
+// For vector type v64i1 uint_to_fo to v64f16:
+// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
+// R3 = subreg_high (R32)
+// R2 = subreg_low (R32)
+// R1 = #1
+// V1 = vsplat(R1)
+// V2 = vsplat(R2)
+// V3 = vsplat(R3)
+// Q0 = vand(V1,R1)
+// V0.w=prefixsum(Q0)
+// V0.w=vsub(V0.w,V1.w)
+// V2.w = vlsr(V2.w,V0.w)
+// V3.w = vlsr(V3.w,V0.w)
+// V2 = vand(V2,V1)
+// V3 = vand(V3,V1)
+// V2.h = vpacke(V3.w,V2.w)
+// V2.hf = V2.h
+SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
+ SelectionDAG &DAG) const {
+
+ MVT ResTy = ty(PredOp);
+ const SDLoc &dl(PredOp);
+
+ SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
+ // Get the hi and lo regs
+ SDValue HiReg =
+ DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
+ SDValue LoReg =
+ DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
+ // Get constant #1 and splat into vector V1
+ SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
+ SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
+ SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+ SDValue(RegConst, 0));
+ // Splat the hi and lo args
+ SDNode *SplatHi =
+ DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
+ SDNode *SplatLo =
+ DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
+ // vand between splatted const and const
+ SDNode *PredTransfer =
+ DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
+ SDValue(SplatConst, 0), SDValue(RegConst, 0));
+ // Get the prefixsum
+ SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
+ SDValue(PredTransfer, 0));
+ // Get the vsub
+ SDNode *Vsub =
+ DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
+ SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
+ // Get vlsr for hi and lo
+ SDNode *IndexShift_hi =
+ DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
+ SDValue(SplatHi, 0), SDValue(Vsub, 0));
+ SDNode *IndexShift_lo =
+ DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
+ SDValue(SplatLo, 0), SDValue(Vsub, 0));
+ // Get vand of hi and lo
+ SDNode *MaskOff_hi =
+ DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
+ SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
+ SDNode *MaskOff_lo =
+ DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
+ SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
+ // Pack them
+ SDNode *Pack =
+ DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
+ SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
+ SDNode *Convert =
+ DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
+ return SDValue(Convert, 0);
+}
+
SDValue
HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
// Catch invalid conversion ops (just in case).
@@ -2343,6 +2464,13 @@ HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
MVT FpTy = ResTy.getVectorElementType();
+ if (Op.getOpcode() == ISD::UINT_TO_FP) {
+ if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
+ return LowerHvxPred32ToFp(Op, DAG);
+ if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
+ return LowerHvxPred64ToFp(Op, DAG);
+ }
+
if (Subtarget.useHVXIEEEFPOps()) {
// There are only conversions to f16.
if (FpTy == MVT::f16) {
diff --git a/llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll
new file mode 100644
index 0000000000000..dfb2bc83537dc
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll
@@ -0,0 +1,25 @@
+; Tests lowering of v32i1 to v32f32
+
+; RUN: llc -march=hexagon -mattr=+hvxv79,+hvx-length128b,+hvx-ieee-fp \
+; RUN: -stop-after=hexagon-isel %s -o - | FileCheck %s
+
+; CHECK: [[R0:%[0-9]+]]:hvxvr = V6_lvsplatw killed %{{[0-9]+}}
+; CHECK-NEXT: [[R1:%[0-9]+]]:intregs = A2_tfrsi 1
+; CHECK-NEXT: [[R2:%[0-9]+]]:hvxvr = V6_lvsplatw [[R1]]
+; CHECK-NEXT: [[R3:%[0-9]+]]:hvxqr = V6_vandvrt [[R2]], [[R1]]
+; CHECK-NEXT: [[R4:%[0-9]+]]:hvxvr = V6_vprefixqw killed [[R3]]
+; CHECK-NEXT: [[R5:%[0-9]+]]:hvxvr = V6_vsubw killed [[R4]], [[R2]]
+; CHECK-NEXT: [[R6:%[0-9]+]]:hvxvr = V6_vlsrwv killed [[R0]], killed [[R5]]
+; CHECK-NEXT: [[R7:%[0-9]+]]:hvxvr = V6_vand killed [[R6]], [[R2]]
+; CHECK-NEXT: [[R8:%[0-9]+]]:hvxvr = V6_vconv_sf_w killed [[R7]]
+; CHECK-NEXT: hvxvr = V6_vadd_sf_sf [[R8]], [[R8]]
+
+define <32 x float> @uitofp_i1(<32 x i16> %in0, <32 x i16> %in1) #0
+{
+ %q1 = icmp eq <32 x i16> %in0, %in1
+ %fp0 = uitofp <32 x i1> %q1 to <32 x float>
+ %out = fadd <32 x float> %fp0, %fp0
+ ret <32 x float> %out
+}
+
+attributes #0 = { nounwind readnone "target-cpu"="hexagonv79" "target-features"="+hvxv79,+hvx-length128b" }
diff --git a/llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll
new file mode 100644
index 0000000000000..8769e345655e9
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll
@@ -0,0 +1,27 @@
+; Tests the conversion pattern for v64i1 to v64f16
+; r0, r3 and r9 registers are i32 types converted from
+; v32i1 via a bitcasting sequence.
+
+; RUN: llc -march=hexagon -mattr=+hvxv79,+hvx-length128b \
+; RUN: %s -verify-machineinstrs -o - | FileCheck %s
+
+; CHECK: [[V3:v[0-9]+]] = vsplat([[R0:r[0-9]+]])
+; CHECK: [[Q0:q[0-9]+]] = vand([[V3]],[[R0]])
+; CHECK: [[V4:v[0-9]+]].w = prefixsum([[Q0]])
+; CHECK: [[V5:v[0-9]+]].w = vsub([[V4]].w,[[V3]].w)
+; CHECK: [[V1:v[0-9]+]] = vsplat(r
+; CHECK: [[V2:v[0-9]+]] = vsplat(r
+; CHECK: [[V6:v[0-9]+]].w = vlsr([[V1]].w,[[V5]].w)
+; CHECK: [[V7:v[0-9]+]].w = vlsr([[V2]].w,[[V5]].w)
+; CHECK: [[V8:v[0-9]+]] = vand([[V6]],[[V3]])
+; CHECK: [[V9:v[0-9]+]] = vand([[V7]],[[V3]])
+; CHECK: [[V10:v[0-9]+]].h = vpacke([[V9]].w,[[V8]].w)
+; CHECK: .hf = [[V10]].h
+
+define <64 x half> @uitofp_i1(<64 x i16> %in0, <64 x i16> %in1)
+{
+ %in = icmp eq <64 x i16> %in0, %in1
+ %fp0 = uitofp <64 x i1> %in to <64 x half>
+ %out = fadd <64 x half> %fp0, %fp0
+ ret <64 x half> %out
+}
|
@iajbar could you please review this patch |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
This patch introduces uint_to_fp conversions from v32i1 and v64i1 predicate vectors to v32f32 and v64f16 floating-point vectors. Patch-by: Santanu Das Co-authored-by: quic-santdas <[email protected]>
This patch introduces uint_to_fp conversions from v32i1 and v64i1 predicate vectors to v32f32 and v64f16 floating-point vectors. Patch-by: Santanu Das Co-authored-by: quic-santdas <[email protected]>
This patch introduces uint_to_fp conversions from v32i1 and v64i1 predicate vectors to v32f32 and v64f16 floating-point vectors. Patch-by: Santanu Das Co-authored-by: quic-santdas <[email protected]>
This patch introduces uint_to_fp conversions from v32i1 and v64i1 predicate vectors to v32f32 and v64f16 floating-point vectors.
Patch-by: Santanu Das