Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_ArmBase_Arm64_MultiplyLongAdd:
ins = varTypeIsUnsigned(intrin.baseType) ? INS_umaddl : INS_smaddl;
break;

case NI_ArmBase_Arm64_MultiplyLongSub:
ins = varTypeIsUnsigned(intrin.baseType) ? INS_umsubl : INS_smsubl;
break;

default:
ins = HWIntrinsicInfo::lookupIns(intrin.id, intrin.baseType);
break;
Expand Down Expand Up @@ -1112,6 +1120,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt);
break;
}

case NI_ArmBase_Arm64_MultiplyLongAdd:
case NI_ArmBase_Arm64_MultiplyLongSub:
assert(opt == INS_OPTS_NONE);
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg);
break;

default:
unreached();
}
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,9 @@ HARDWARE_INTRINSIC(ArmBase_Arm64, LeadingSignCount,
HARDWARE_INTRINSIC(ArmBase_Arm64, LeadingZeroCount, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_clz, INS_clz, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoFloatingPointUsed)
HARDWARE_INTRINSIC(ArmBase_Arm64, MultiplyHigh, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_smulh, INS_umulh, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed)
HARDWARE_INTRINSIC(ArmBase_Arm64, ReverseElementBits, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rbit, INS_rbit, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed)
HARDWARE_INTRINSIC(ArmBase_Arm64, MultiplyLongAdd, 0, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_smaddl, INS_umaddl, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed)
HARDWARE_INTRINSIC(ArmBase_Arm64, MultiplyLongSub, 0, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_smsubl, INS_umsubl, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed)
HARDWARE_INTRINSIC(ArmBase_Arm64, MultiplyLongNeg, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_smnegl, INS_umnegl, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags
Expand Down
15 changes: 14 additions & 1 deletion src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -498,9 +498,16 @@ GenTree* Lowering::LowerNode(GenTree* node)

case GT_NEG:
#ifdef TARGET_ARM64
{
GenTree* next = TryLowerNegToMulLongOp(node->AsOp());
if (next != nullptr)
{
return next;
}
ContainCheckNeg(node->AsOp());
}
#endif
break;
break;
case GT_SELECT:
return LowerSelect(node->AsConditional());

Expand Down Expand Up @@ -6316,6 +6323,12 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
{
return next;
}

next = TryLowerAddSubToMulLongOp(node);
if (next != nullptr)
{
return next;
}
}
#endif // TARGET_ARM64

Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ class Lowering final : public Phase
void ContainCheckNeg(GenTreeOp* neg);
void TryLowerCnsIntCselToCinc(GenTreeOp* select, GenTree* cond);
void TryLowerCselToCSOp(GenTreeOp* select, GenTree* cond);
GenTree* TryLowerAddSubToMulLongOp(GenTreeOp* op);
GenTree* TryLowerNegToMulLongOp(GenTreeOp* op);
#endif
void ContainCheckSelect(GenTreeOp* select);
void ContainCheckBitCast(GenTree* node);
Expand Down
162 changes: 162 additions & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,16 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
return next;
}
}

if (binOp->OperIs(GT_SUB))
{
// Attempt to optimize for umsubl/smsubl.
GenTree* next = TryLowerAddSubToMulLongOp(binOp);
if (next != nullptr)
{
return next;
}
}
#endif
}

Expand Down Expand Up @@ -2750,6 +2760,158 @@ void Lowering::TryLowerCnsIntCselToCinc(GenTreeOp* select, GenTree* cond)
}
}
}

//----------------------------------------------------------------------------------------------
// TryLowerAddSubToCombinedMulOp: Attempt to convert ADD and SUB nodes to a combined multiply
// and add/sub operation. Conversion can only happen if the operands to the
// operation meet the following criteria:
// - One op is a MUL_LONG containing two integer operands, and the other is a long.
//
// Arguments:
// op - The ADD or SUB node to attempt an optimisation on.
//
// Returns:
// A pointer to the next node to evaluate. On no operation, returns nullptr.
//
GenTree* Lowering::TryLowerAddSubToMulLongOp(GenTreeOp* op)
{
assert(op->OperIs(GT_ADD, GT_SUB));

if (!comp->opts.OptimizationEnabled())
return nullptr;

if (!JitConfig.EnableHWIntrinsic())
return nullptr;

if (op->isContained())
return nullptr;

if (!varTypeIsIntegral(op))
return nullptr;

if (op->gtFlags & GTF_SET_FLAGS)
return nullptr;

if (op->gtOverflow())
return nullptr;

GenTree* op1 = op->gtGetOp1();
GenTree* op2 = op->gtGetOp2();

// Select which operation is the MUL_LONG and which is the add value.
GenTreeOp* mul;
GenTree* addVal;
if (op1->OperIs(GT_MUL_LONG))
{
// For subtractions, the multiply must be second, as [u/s]msubl performs:
// addValue - (mulValue1 * mulValue2)
if (op->OperIs(GT_SUB))
{
return nullptr;
}

mul = op1->AsOp();
addVal = op2;
}
else if (op2->OperIs(GT_MUL_LONG))
{
mul = op2->AsOp();
addVal = op1;
}
else
{
// Exit if neither operation are GT_MUL_LONG.
return nullptr;
}

// Additional value must be of long size.
if (!addVal->TypeIs(TYP_LONG))
return nullptr;

// Mul values must both be integers.
if (!genActualTypeIsInt(mul->gtOp1) || !genActualTypeIsInt(mul->gtOp2))
return nullptr;

// Create the new node and replace the original.
{
NamedIntrinsic intrinsicId =
op->OperIs(GT_ADD) ? NI_ArmBase_Arm64_MultiplyLongAdd : NI_ArmBase_Arm64_MultiplyLongSub;
GenTreeHWIntrinsic* outOp =
comp->gtNewScalarHWIntrinsicNode(TYP_LONG, mul->gtOp1, mul->gtOp2, addVal, intrinsicId);
outOp->SetSimdBaseJitType(mul->IsUnsigned() ? CORINFO_TYPE_ULONG : CORINFO_TYPE_LONG);
op->ReplaceWith(outOp, comp);
}

// Delete the hanging MUL.
mul->gtOp1 = nullptr;
mul->gtOp2 = nullptr;
BlockRange().Remove(mul);

#ifdef DEBUG
JITDUMP("Converted to HW_INTRINSIC 'NI_ArmBase_Arm64_MultiplyLong[Add/Sub]'.\n");
if (comp->verbose)
comp->gtDispNodeName(op);
JITDUMP(":\n");
DISPTREERANGE(BlockRange(), op);
JITDUMP("\n");
#endif

return op;
}

//----------------------------------------------------------------------------------------------
// TryLowerNegToCombinedMulOp: Attempt to convert NEG nodes to a combined multiply
// and negate operation. Conversion can only happen if the operands to the
// operation meet one of the following criteria:
// - op1 is a MUL_LONG containing two integer operands.
//
// Arguments:
// op - The NEG node to attempt an optimisation on.
//
// Returns:
// A pointer to the next node to evaluate. On no operation, returns nullptr.
//
GenTree* Lowering::TryLowerNegToMulLongOp(GenTreeOp* op)
{
assert(op->OperIs(GT_NEG));

if (!comp->opts.OptimizationEnabled())
return nullptr;

if (op->isContained())
return nullptr;

if (!varTypeIsIntegral(op))
return nullptr;

if (op->gtFlags & GTF_SET_FLAGS)
return nullptr;

GenTree* op1 = op->gtGetOp1();

// Ensure the negated operand is a MUL_LONG.
if (!op1->OperIs(GT_MUL_LONG))
return nullptr;

// Ensure the MUL_LONG contains two integer parameters.
GenTreeOp* mul = op1->AsOp();
if (!genActualTypeIsInt(mul->gtOp1) || !genActualTypeIsInt(mul->gtOp2))
return nullptr;

// Able to optimise, create the new node and replace the original.
{
GenTreeHWIntrinsic* outOp =
comp->gtNewScalarHWIntrinsicNode(TYP_LONG, mul->gtOp1, mul->gtOp2, NI_ArmBase_Arm64_MultiplyLongNeg);
op->ReplaceWith(outOp, comp);
}

// Clean up hanging mul.
mul->gtOp1 = nullptr;
mul->gtOp2 = nullptr;
BlockRange().Remove(mul);

return op;
}
#endif // TARGET_ARM64

//------------------------------------------------------------------------
Expand Down
Loading