From 1191d6d3daef8d2d04f3c52e51996b318f6831fe Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 5 Mar 2022 20:14:21 +0300 Subject: [PATCH 1/6] Optimize movzx after setcc --- src/coreclr/jit/codegenxarch.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index e2dad3d7cc6ed6..4cd2d6835ae68b 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -6438,6 +6438,8 @@ void CodeGen::genCompareInt(GenTree* treeNode) // Sign jump optimization should only be set the following check assert((tree->gtFlags & GTF_RELOP_SJUMP_OPT) == 0); + var_types targetType = tree->TypeGet(); + if (canReuseFlags && emit->AreFlagsSetToZeroCmp(op1->GetRegNum(), emitTypeSize(type), tree->OperGet())) { JITDUMP("Not emitting compare due to flags being already set\n"); @@ -6449,13 +6451,20 @@ void CodeGen::genCompareInt(GenTree* treeNode) } else { + // Clear target reg in advance via "xor reg,reg" to avoid movzx after SETCC + if ((targetReg != REG_NA) && !varTypeIsByte(targetType) && (op1->GetReg() != targetReg) && + (op2->GetReg() != targetReg)) + { + instGen_Set_Reg_To_Zero(emitTypeSize(TYP_I_IMPL), targetReg); + targetType = TYP_BOOL; // just a tip for inst_SETCC that movzx is not needed + } emit->emitInsBinary(ins, emitTypeSize(type), op1, op2); } // Are we evaluating this into a register? if (targetReg != REG_NA) { - inst_SETCC(GenCondition::FromIntegralRelop(tree), tree->TypeGet(), targetReg); + inst_SETCC(GenCondition::FromIntegralRelop(tree), targetType, targetReg); genProduceReg(tree); } } From dde2c1edb3a5897009a15c6650bcdf27a2195132 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 5 Mar 2022 20:39:18 +0300 Subject: [PATCH 2/6] fix build error --- src/coreclr/jit/codegenxarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 4cd2d6835ae68b..63348874f1722a 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -6452,8 +6452,8 @@ void CodeGen::genCompareInt(GenTree* treeNode) else { // Clear target reg in advance via "xor reg,reg" to avoid movzx after SETCC - if ((targetReg != REG_NA) && !varTypeIsByte(targetType) && (op1->GetReg() != targetReg) && - (op2->GetReg() != targetReg)) + if ((targetReg != REG_NA) && !varTypeIsByte(targetType) && (op1->GetRegNum() != targetReg) && + (op2->GetRegNum() != targetReg)) { instGen_Set_Reg_To_Zero(emitTypeSize(TYP_I_IMPL), targetReg); targetType = TYP_BOOL; // just a tip for inst_SETCC that movzx is not needed From 9110760e93d8cc598ac8532b6d9bb182f441425b Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 6 Mar 2022 00:40:13 +0300 Subject: [PATCH 3/6] Let's try this --- src/coreclr/jit/codegenxarch.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 63348874f1722a..41189b6dc3d888 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -6452,11 +6452,16 @@ void CodeGen::genCompareInt(GenTree* treeNode) else { // Clear target reg in advance via "xor reg,reg" to avoid movzx after SETCC - if ((targetReg != REG_NA) && !varTypeIsByte(targetType) && (op1->GetRegNum() != targetReg) && - (op2->GetRegNum() != targetReg)) + if ((targetReg != REG_NA) && (op1->GetRegNum() != targetReg) && (op2->GetRegNum() != targetReg) && + !varTypeIsByte(targetType)) { - instGen_Set_Reg_To_Zero(emitTypeSize(TYP_I_IMPL), targetReg); - targetType = TYP_BOOL; // just a tip for inst_SETCC that movzx is not needed + // memory loads might still use target reg here so we can't clear it + if ((op1->isUsedFromReg() || op1->isContainedIntOrIImmed()) && + (op2->isUsedFromReg() || op2->isContainedIntOrIImmed())) + { + instGen_Set_Reg_To_Zero(emitTypeSize(TYP_I_IMPL), targetReg); + targetType = TYP_BOOL; // just a tip for inst_SETCC that movzx is not needed + } } emit->emitInsBinary(ins, emitTypeSize(type), op1, op2); } From 3977f17f7e02e593c2c24f8ac3a93199ea15a47e Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 6 Mar 2022 12:05:05 +0300 Subject: [PATCH 4/6] add floats --- src/coreclr/jit/codegenxarch.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 41189b6dc3d888..b623e670f76f61 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -6294,6 +6294,21 @@ void CodeGen::genCompareFloat(GenTree* treeNode) ins = (op1Type == TYP_FLOAT) ? INS_ucomiss : INS_ucomisd; cmpAttr = emitTypeSize(op1Type); + var_types targetType = treeNode->TypeGet(); + + // Clear target reg in advance via "xor reg,reg" to avoid movzx after SETCC + if ((targetReg != REG_NA) && (op1->GetRegNum() != targetReg) && (op2->GetRegNum() != targetReg) && + !varTypeIsByte(targetType)) + { + // memory loads might still use target reg here so we can't clear it + if ((op1->isUsedFromReg() || op1->isContainedFltOrDblImmed()) && + (op2->isUsedFromReg() || op2->isContainedFltOrDblImmed())) + { + instGen_Set_Reg_To_Zero(emitTypeSize(TYP_I_IMPL), targetReg); + targetType = TYP_BOOL; // just a tip for inst_SETCC that movzx is not needed + } + } + GetEmitter()->emitInsBinary(ins, cmpAttr, op1, op2); // Are we evaluating this into a register? @@ -6309,7 +6324,7 @@ void CodeGen::genCompareFloat(GenTree* treeNode) condition = GenCondition(GenCondition::P); } - inst_SETCC(condition, treeNode->TypeGet(), targetReg); + inst_SETCC(condition, targetType, targetReg); genProduceReg(tree); } } From 22f3d20570fccc2766463ccf08aaa5134e9041e7 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 7 Mar 2022 17:22:32 +0300 Subject: [PATCH 5/6] Use Jakob's approach --- src/coreclr/jit/codegenxarch.cpp | 11 ++++------- src/coreclr/jit/gentree.cpp | 20 ++++++++++++++++++++ src/coreclr/jit/gentree.h | 1 + 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index b623e670f76f61..ce948c084a4e85 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -6300,15 +6300,13 @@ void CodeGen::genCompareFloat(GenTree* treeNode) if ((targetReg != REG_NA) && (op1->GetRegNum() != targetReg) && (op2->GetRegNum() != targetReg) && !varTypeIsByte(targetType)) { - // memory loads might still use target reg here so we can't clear it - if ((op1->isUsedFromReg() || op1->isContainedFltOrDblImmed()) && - (op2->isUsedFromReg() || op2->isContainedFltOrDblImmed())) + regMaskTP targetRegMask = genRegMask(targetReg); + if (((op1->gtGetContainedRegMask() | op2->gtGetContainedRegMask()) & targetRegMask) == 0) { instGen_Set_Reg_To_Zero(emitTypeSize(TYP_I_IMPL), targetReg); targetType = TYP_BOOL; // just a tip for inst_SETCC that movzx is not needed } } - GetEmitter()->emitInsBinary(ins, cmpAttr, op1, op2); // Are we evaluating this into a register? @@ -6470,9 +6468,8 @@ void CodeGen::genCompareInt(GenTree* treeNode) if ((targetReg != REG_NA) && (op1->GetRegNum() != targetReg) && (op2->GetRegNum() != targetReg) && !varTypeIsByte(targetType)) { - // memory loads might still use target reg here so we can't clear it - if ((op1->isUsedFromReg() || op1->isContainedIntOrIImmed()) && - (op2->isUsedFromReg() || op2->isContainedIntOrIImmed())) + regMaskTP targetRegMask = genRegMask(targetReg); + if (((op1->gtGetContainedRegMask() | op2->gtGetContainedRegMask()) & targetRegMask) == 0) { instGen_Set_Reg_To_Zero(emitTypeSize(TYP_I_IMPL), targetReg); targetType = TYP_BOOL; // just a tip for inst_SETCC that movzx is not needed diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 8350e7ad725fb2..371296f281f1c9 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -862,6 +862,26 @@ unsigned GenTree::GetMultiRegCount(Compiler* comp) const return 1; } +//--------------------------------------------------------------- +// gtGetContainedRegMask: Get the reg mask of the node including +// contained nodes (recursive). +// +// Arguments: +// None +// +// Return Value: +// Reg Mask of GenTree node. +// +regMaskTP GenTree::gtGetContainedRegMask() +{ + regMaskTP mask = GetRegNum() != REG_NA ? gtGetRegMask() : 0; + for (GenTree* operand : Operands()) + { + mask |= operand->gtGetContainedRegMask(); + } + return mask; +} + //--------------------------------------------------------------- // gtGetRegMask: Get the reg mask of the node. // diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index d86d891b303271..af58071a37d20d 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -990,6 +990,7 @@ struct GenTree int GetRegisterDstCount(Compiler* compiler) const; regMaskTP gtGetRegMask() const; + regMaskTP gtGetContainedRegMask(); GenTreeFlags gtFlags; From cd8f1a9ee1d0640c5a9a0bbc335f592fb7012bf2 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Mon, 7 Mar 2022 19:20:57 +0300 Subject: [PATCH 6/6] Update gentree.cpp --- src/coreclr/jit/gentree.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 371296f281f1c9..640ee36accebc5 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -874,7 +874,12 @@ unsigned GenTree::GetMultiRegCount(Compiler* comp) const // regMaskTP GenTree::gtGetContainedRegMask() { - regMaskTP mask = GetRegNum() != REG_NA ? gtGetRegMask() : 0; + if (!isContained()) + { + return gtGetRegMask(); + } + + regMaskTP mask = 0; for (GenTree* operand : Operands()) { mask |= operand->gtGetContainedRegMask();