From deb43c21e6f21f2653747a459cd9d350503d2900 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Wed, 15 Mar 2023 10:00:47 +0000 Subject: [PATCH 1/4] C++: Use local flow instead of GVN to find parameters that are used in switch statements. --- .../ir/dataflow/internal/DataFlowPrivate.qll | 51 ++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index 5bda27a5ea0f..266be71836b5 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -897,6 +897,55 @@ IRBlock getBasicBlock(Node node) { result = getBasicBlock(node.(PostUpdateNode).getPreUpdateNode()) } +/** + * A local flow relation that includes both local steps, read steps and + * argument-to-return flow through summarized functions. + */ +private predicate localFlowStepWithSummaries(Node node1, Node node2) { + localFlowStep(node1, node2) + or + readStep(node1, _, node2) + or + argumentValueFlowsThrough(node1, _, node2) +} + +/** Holds if `node` flows to a node that is used in a `SwitchInstruction`. */ +private predicate localStepsToSwitch(Node node) { + node.asOperand() = any(SwitchInstruction switch).getExpressionOperand() + or + exists(Node succ | + localStepsToSwitch(succ) and + localFlowStepWithSummaries(node, succ) + ) +} + +/** + * Holds if `node` is part of a path from a `ParameterNode` to an operand + * of a `SwitchInstruction`. + */ +private predicate localStepsFromParameter(Node node) { + localStepsToSwitch(node) and + ( + node instanceof ParameterNode + or + exists(Node prev | + localStepsFromParameter(prev) and + localFlowStepWithSummaries(prev, node) + ) + ) +} + +/** + * The local flow relation `localFlowStepWithSummaries` pruned to only + * include steps that are part of a path from a `ParameterNode` to an + * operand of a `SwitchInstruction`. + */ +private predicate getAdditionalFlowIntoCallNodeTermStep(Node node1, Node node2) { + localStepsFromParameter(node1) and + localStepsFromParameter(node2) and + localFlowStepWithSummaries(node1, node2) +} + /** * Gets an additional term that is added to the `join` and `branch` computations to reflect * an additional forward or backwards branching factor that is not taken into account @@ -910,7 +959,7 @@ int getAdditionalFlowIntoCallNodeTerm(ArgumentNode arg, ParameterNode p) { viableParamArg(call, p, arg) and viableParamArg(call, switchee, _) and switch.getExpressionOperand() = op and - valueNumber(switchee.asInstruction()).getAUse() = op and + getAdditionalFlowIntoCallNodeTermStep+(switchee, operandNode(op)) and result = countNumberOfBranchesUsingParameter(switch, p) ) } From 623f6ff701cb0d1faf386db47c64b4d8f036badb Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Wed, 15 Mar 2023 10:01:47 +0000 Subject: [PATCH 2/4] C++: Move things around so that 'getAdditionalFlowIntoCallNodeTerm' is in the same stage as 'DataFlowImplCommon'. --- .../ir/dataflow/internal/DataFlowPrivate.qll | 92 ++++++++++--------- 1 file changed, 50 insertions(+), 42 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index 266be71836b5..cb779a0d7db1 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -5,33 +5,59 @@ private import DataFlowDispatch private import DataFlowImplConsistency private import semmle.code.cpp.ir.internal.IRCppLanguage private import SsaInternals as Ssa -private import DataFlowImplCommon +private import DataFlowImplCommon as DataFlowImplCommon private import semmle.code.cpp.ir.ValueNumbering cached private module Cached { cached - newtype TIRDataFlowNode0 = - TInstructionNode0(Instruction i) { - not Ssa::ignoreInstruction(i) and - not exists(Operand op | - not Ssa::ignoreOperand(op) and i = Ssa::getIRRepresentationOfOperand(op) - ) and - // We exclude `void`-typed instructions because they cannot contain data. - // However, if the instruction is a glvalue, and their type is `void`, then the result - // type of the instruction is really `void*`, and thus we still want to have a dataflow - // node for it. - (not i.getResultType() instanceof VoidType or i.isGLValue()) - } or - TMultipleUseOperandNode0(Operand op) { - not Ssa::ignoreOperand(op) and not exists(Ssa::getIRRepresentationOfOperand(op)) - } or - TSingleUseOperandNode0(Operand op) { - not Ssa::ignoreOperand(op) and exists(Ssa::getIRRepresentationOfOperand(op)) - } -} - -private import Cached + module Nodes0 { + cached + newtype TIRDataFlowNode0 = + TInstructionNode0(Instruction i) { + not Ssa::ignoreInstruction(i) and + not exists(Operand op | + not Ssa::ignoreOperand(op) and i = Ssa::getIRRepresentationOfOperand(op) + ) and + // We exclude `void`-typed instructions because they cannot contain data. + // However, if the instruction is a glvalue, and their type is `void`, then the result + // type of the instruction is really `void*`, and thus we still want to have a dataflow + // node for it. + (not i.getResultType() instanceof VoidType or i.isGLValue()) + } or + TMultipleUseOperandNode0(Operand op) { + not Ssa::ignoreOperand(op) and not exists(Ssa::getIRRepresentationOfOperand(op)) + } or + TSingleUseOperandNode0(Operand op) { + not Ssa::ignoreOperand(op) and exists(Ssa::getIRRepresentationOfOperand(op)) + } + } + + /** + * Gets an additional term that is added to the `join` and `branch` computations to reflect + * an additional forward or backwards branching factor that is not taken into account + * when calculating the (virtual) dispatch cost. + * + * Argument `arg` is part of a path from a source to a sink, and `p` is the target parameter. + */ + pragma[nomagic] + cached + int getAdditionalFlowIntoCallNodeTerm(ArgumentNode arg, ParameterNode p) { + DataFlowImplCommon::forceCachingInSameStage() and + exists( + ParameterNode switchee, SwitchInstruction switch, ConditionOperand op, DataFlowCall call + | + DataFlowImplCommon::viableParamArg(call, p, arg) and + DataFlowImplCommon::viableParamArg(call, switchee, _) and + switch.getExpressionOperand() = op and + getAdditionalFlowIntoCallNodeTermStep+(switchee, operandNode(op)) and + result = countNumberOfBranchesUsingParameter(switch, p) + ) + } +} + +import Cached +private import Nodes0 class Node0Impl extends TIRDataFlowNode0 { /** @@ -906,7 +932,7 @@ private predicate localFlowStepWithSummaries(Node node1, Node node2) { or readStep(node1, _, node2) or - argumentValueFlowsThrough(node1, _, node2) + DataFlowImplCommon::argumentValueFlowsThrough(node1, _, node2) } /** Holds if `node` flows to a node that is used in a `SwitchInstruction`. */ @@ -946,24 +972,6 @@ private predicate getAdditionalFlowIntoCallNodeTermStep(Node node1, Node node2) localFlowStepWithSummaries(node1, node2) } -/** - * Gets an additional term that is added to the `join` and `branch` computations to reflect - * an additional forward or backwards branching factor that is not taken into account - * when calculating the (virtual) dispatch cost. - * - * Argument `arg` is part of a path from a source to a sink, and `p` is the target parameter. - */ -pragma[nomagic] -int getAdditionalFlowIntoCallNodeTerm(ArgumentNode arg, ParameterNode p) { - exists(ParameterNode switchee, SwitchInstruction switch, ConditionOperand op, DataFlowCall call | - viableParamArg(call, p, arg) and - viableParamArg(call, switchee, _) and - switch.getExpressionOperand() = op and - getAdditionalFlowIntoCallNodeTermStep+(switchee, operandNode(op)) and - result = countNumberOfBranchesUsingParameter(switch, p) - ) -} - /** Gets the `IRVariable` associated with the parameter node `p`. */ pragma[nomagic] private IRVariable getIRVariableForParameterNode(ParameterNode p) { @@ -992,7 +1000,7 @@ private EdgeKind caseOrDefaultEdge() { /** * Gets the number of switch branches that that read from (or write to) the parameter `p`. */ -int countNumberOfBranchesUsingParameter(SwitchInstruction switch, ParameterNode p) { +private int countNumberOfBranchesUsingParameter(SwitchInstruction switch, ParameterNode p) { exists(Ssa::SourceVariable sv | parameterNodeHasSourceVariable(p, sv) and // Count the number of cases that use the parameter. We do this by finding the phi node From 9bd3347a3c48f94f4ba90e8c2c08f094d4d2cad3 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Wed, 15 Mar 2023 10:33:47 +0000 Subject: [PATCH 3/4] C++: Remove import. --- .../lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index cb779a0d7db1..b89536893d83 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -6,7 +6,6 @@ private import DataFlowImplConsistency private import semmle.code.cpp.ir.internal.IRCppLanguage private import SsaInternals as Ssa private import DataFlowImplCommon as DataFlowImplCommon -private import semmle.code.cpp.ir.ValueNumbering cached private module Cached { From 08419b77af99dccb1d8760cbab1fa96a9d936de4 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Wed, 15 Mar 2023 14:07:04 +0000 Subject: [PATCH 4/4] C++: Respond to PR reviews. --- .../code/cpp/ir/dataflow/internal/DataFlowPrivate.qll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index b89536893d83..9d69856e20b5 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -948,13 +948,13 @@ private predicate localStepsToSwitch(Node node) { * Holds if `node` is part of a path from a `ParameterNode` to an operand * of a `SwitchInstruction`. */ -private predicate localStepsFromParameter(Node node) { +private predicate localStepsFromParameterToSwitch(Node node) { localStepsToSwitch(node) and ( node instanceof ParameterNode or exists(Node prev | - localStepsFromParameter(prev) and + localStepsFromParameterToSwitch(prev) and localFlowStepWithSummaries(prev, node) ) ) @@ -966,8 +966,8 @@ private predicate localStepsFromParameter(Node node) { * operand of a `SwitchInstruction`. */ private predicate getAdditionalFlowIntoCallNodeTermStep(Node node1, Node node2) { - localStepsFromParameter(node1) and - localStepsFromParameter(node2) and + localStepsFromParameterToSwitch(node1) and + localStepsFromParameterToSwitch(node2) and localFlowStepWithSummaries(node1, node2) }