Skip to content

Commit 76bb328

Browse files
committed
python: instantiate module for variable capture
This provides variable capture in standard situations: - nested functions - lambdas There are some deficiencies: - we do not yet handle objects capturing variables. - we do not handle variables captured via the `nonlocal` keyword. This should be solved at the AST level, though, and then it should "just work". There is still inconsistencies the case of where a `SynthesizedCaptureNode` has a comprehensions as its enclosing callable. In this case, `TFunction(cn.getEnclosingCallable())` is not defined and so getEnclosingCallable does not exist for the `CaptureNode`.
1 parent c67152c commit 76bb328

File tree

9 files changed

+347
-16
lines changed

9 files changed

+347
-16
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ private import semmle.python.dataflow.new.internal.TypeTracker::CallGraphConstru
4343
newtype TParameterPosition =
4444
/** Used for `self` in methods, and `cls` in classmethods. */
4545
TSelfParameterPosition() or
46+
TLambdaSelfParameterPosition() or
4647
TPositionalParameterPosition(int index) {
4748
index = any(Parameter p).getPosition()
4849
or
@@ -79,6 +80,9 @@ class ParameterPosition extends TParameterPosition {
7980
/** Holds if this position represents a `self`/`cls` parameter. */
8081
predicate isSelf() { this = TSelfParameterPosition() }
8182

83+
/** Holds if this position represents a reference to a lambda itself. Only used for tracking flow through captured variables. */
84+
predicate isLambdaSelf() { this = TLambdaSelfParameterPosition() }
85+
8286
/** Holds if this position represents a positional parameter at (0-based) `index`. */
8387
predicate isPositional(int index) { this = TPositionalParameterPosition(index) }
8488

@@ -110,6 +114,8 @@ class ParameterPosition extends TParameterPosition {
110114
string toString() {
111115
this.isSelf() and result = "self"
112116
or
117+
this.isLambdaSelf() and result = "lambda self"
118+
or
113119
exists(int index | this.isPositional(index) and result = "position " + index)
114120
or
115121
exists(string name | this.isKeyword(name) and result = "keyword " + name)
@@ -130,6 +136,7 @@ class ParameterPosition extends TParameterPosition {
130136
newtype TArgumentPosition =
131137
/** Used for `self` in methods, and `cls` in classmethods. */
132138
TSelfArgumentPosition() or
139+
TLambdaSelfArgumentPosition() or
133140
TPositionalArgumentPosition(int index) {
134141
exists(any(CallNode c).getArg(index))
135142
or
@@ -154,6 +161,9 @@ class ArgumentPosition extends TArgumentPosition {
154161
/** Holds if this position represents a `self`/`cls` argument. */
155162
predicate isSelf() { this = TSelfArgumentPosition() }
156163

164+
/** Holds if this position represents a lambda `self` argument. Only used for tracking flow through captured variables. */
165+
predicate isLambdaSelf() { this = TLambdaSelfArgumentPosition() }
166+
157167
/** Holds if this position represents a positional argument at (0-based) `index`. */
158168
predicate isPositional(int index) { this = TPositionalArgumentPosition(index) }
159169

@@ -184,6 +194,8 @@ class ArgumentPosition extends TArgumentPosition {
184194
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
185195
ppos.isSelf() and apos.isSelf()
186196
or
197+
ppos.isLambdaSelf() and apos.isLambdaSelf()
198+
or
187199
exists(int index | ppos.isPositional(index) and apos.isPositional(index))
188200
or
189201
exists(string name | ppos.isKeyword(name) and apos.isKeyword(name))
@@ -1507,6 +1519,36 @@ abstract class ParameterNodeImpl extends Node {
15071519
}
15081520
}
15091521

1522+
/**
1523+
* The value of a lambda itself at function entry, viewed as a node in a data
1524+
* flow graph.
1525+
*
1526+
* This is used for tracking flow through captured variables, and we use a
1527+
* separate node and parameter/argument positions in order to distinguish
1528+
* "lambda self" from "normal self", as lambdas may also access outer `self`
1529+
* variables (through variable capture).
1530+
*/
1531+
class LambdaSelfReferenceNode extends ParameterNodeImpl, TLambdaSelfReferenceNode {
1532+
private Function callable;
1533+
1534+
LambdaSelfReferenceNode() { this = TLambdaSelfReferenceNode(callable) }
1535+
1536+
final Function getCallable() { result = callable }
1537+
1538+
override Parameter getParameter() { none() }
1539+
1540+
override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
1541+
c = TFunction(callable) and
1542+
pos.isLambdaSelf()
1543+
}
1544+
1545+
override Scope getScope() { result = callable }
1546+
1547+
override Location getLocation() { result = callable.getLocation() }
1548+
1549+
override string toString() { result = "lambda self in " + callable }
1550+
}
1551+
15101552
/** A parameter for a library callable with a flow summary. */
15111553
class SummaryParameterNode extends ParameterNodeImpl, FlowSummaryNode {
15121554
SummaryParameterNode() {
@@ -1578,6 +1620,28 @@ private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl
15781620
override Node getPreUpdateNode() { result = pre }
15791621
}
15801622

1623+
private class CapturePostUpdateNode extends PostUpdateNodeImpl, CaptureNode {
1624+
private CaptureNode pre;
1625+
1626+
CapturePostUpdateNode() {
1627+
VariableCapture::Flow::capturePostUpdateNode(this.getSynthesizedCaptureNode(),
1628+
pre.getSynthesizedCaptureNode())
1629+
}
1630+
1631+
override Node getPreUpdateNode() { result = pre }
1632+
}
1633+
1634+
private class CaptureArgumentNode extends CfgNode, ArgumentNode {
1635+
CallCfgNode callNode;
1636+
1637+
CaptureArgumentNode() { this = callNode.getFunction() }
1638+
1639+
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
1640+
callNode.asCfgNode() = call.getNode() and
1641+
pos.isLambdaSelf()
1642+
}
1643+
}
1644+
15811645
/** Gets a viable run-time target for the call `call`. */
15821646
DataFlowCallable viableCallable(DataFlowCall call) {
15831647
call instanceof ExtractedDataFlowCall and

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 207 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,194 @@ module LocalFlow {
378378
}
379379
}
380380

381+
/** Provides logic related to captured variables. */
382+
module VariableCapture {
383+
private import codeql.dataflow.VariableCapture as Shared
384+
385+
class ExprCfgNode extends ControlFlowNode {
386+
ExprCfgNode() { isExpressionNode(this) }
387+
}
388+
389+
private predicate closureFlowStep(ExprCfgNode e1, ExprCfgNode e2) {
390+
// simpleAstFlowStep(e1, e2)
391+
// or
392+
exists(SsaVariable def |
393+
def.getAUse() = e2 and
394+
def.getAnUltimateDefinition().getDefinition().(DefinitionNode).getValue() = e1
395+
)
396+
}
397+
398+
private module CaptureInput implements Shared::InputSig {
399+
private import python as P
400+
401+
class Location = P::Location;
402+
403+
class BasicBlock extends P::BasicBlock {
404+
Callable getEnclosingCallable() { result = this.getScope() }
405+
406+
// TODO: check that this gives useful results
407+
Location getLocation() { result = super.getNode(0).getLocation() }
408+
}
409+
410+
BasicBlock getImmediateBasicBlockDominator(BasicBlock bb) {
411+
result = bb.getImmediateDominator()
412+
}
413+
414+
BasicBlock getABasicBlockSuccessor(BasicBlock bb) { result = bb.getASuccessor() }
415+
416+
class CapturedVariable extends LocalVariable {
417+
Function f;
418+
419+
CapturedVariable() {
420+
this.getScope() = f and
421+
this.getAnAccess().getScope() != f
422+
}
423+
424+
Callable getCallable() { result = f }
425+
426+
Location getLocation() { result = f.getLocation() }
427+
428+
Scope getACapturingScope() {
429+
result = this.getAnAccess().getScope().getScope*() and
430+
result.getScope+() = f
431+
}
432+
}
433+
434+
class CapturedParameter extends CapturedVariable {
435+
CapturedParameter() { this.isParameter() }
436+
437+
ControlFlowNode getCfgNode() { result.getNode().(Parameter) = this.getAnAccess() }
438+
}
439+
440+
class Expr extends ExprCfgNode {
441+
predicate hasCfgNode(BasicBlock bb, int i) { this = bb.getNode(i) }
442+
}
443+
444+
class VariableWrite extends ControlFlowNode {
445+
CapturedVariable v;
446+
447+
VariableWrite() { this = v.getAStore().getAFlowNode() }
448+
449+
CapturedVariable getVariable() { result = v }
450+
451+
predicate hasCfgNode(BasicBlock bb, int i) { this = bb.getNode(i) }
452+
}
453+
454+
class VariableRead extends Expr {
455+
CapturedVariable v;
456+
457+
VariableRead() { this = v.getALoad().getAFlowNode() }
458+
459+
CapturedVariable getVariable() { result = v }
460+
}
461+
462+
class ClosureExpr extends Expr {
463+
ClosureExpr() {
464+
this.getNode() instanceof CallableExpr
465+
or
466+
this.getNode() instanceof Comp
467+
}
468+
469+
predicate hasBody(Callable body) {
470+
body = this.getNode().(CallableExpr).getInnerScope()
471+
or
472+
body = this.getNode().(Comp).getFunction()
473+
}
474+
475+
predicate hasAliasedAccess(Expr f) { closureFlowStep+(this, f) and not closureFlowStep(f, _) }
476+
}
477+
478+
class Callable extends Scope {
479+
predicate isConstructor() { none() }
480+
}
481+
}
482+
483+
class CapturedVariable = CaptureInput::CapturedVariable;
484+
485+
class ClosureExpr = CaptureInput::ClosureExpr;
486+
487+
module Flow = Shared::Flow<CaptureInput>;
488+
489+
private Flow::ClosureNode asClosureNode(Node n) {
490+
result = n.(CaptureNode).getSynthesizedCaptureNode()
491+
or
492+
result.(Flow::ExprNode).getExpr() = n.(CfgNode).getNode()
493+
or
494+
result.(Flow::VariableWriteSourceNode).getVariableWrite() = n.(CfgNode).getNode()
495+
or
496+
result.(Flow::ExprPostUpdateNode).getExpr() =
497+
n.(PostUpdateNode).getPreUpdateNode().(CfgNode).getNode()
498+
or
499+
result.(Flow::ParameterNode).getParameter().getCfgNode() = n.(CfgNode).getNode()
500+
or
501+
result.(Flow::ThisParameterNode).getCallable() = n.(LambdaSelfReferenceNode).getCallable()
502+
}
503+
504+
predicate storeStep(Node nodeFrom, CapturedVariableContent c, Node nodeTo) {
505+
Flow::storeStep(asClosureNode(nodeFrom), c.getVariable(), asClosureNode(nodeTo))
506+
}
507+
508+
predicate readStep(Node nodeFrom, CapturedVariableContent c, Node nodeTo) {
509+
Flow::readStep(asClosureNode(nodeFrom), c.getVariable(), asClosureNode(nodeTo))
510+
}
511+
512+
predicate valueStep(Node nodeFrom, Node nodeTo) {
513+
Flow::localFlowStep(asClosureNode(nodeFrom), asClosureNode(nodeTo))
514+
}
515+
516+
// Note: Learn from JS, https://github.com/github/codeql/pull/14412
517+
// - JS: Capture flow
518+
// - JS: Disallow consecutive captured contents
519+
private module Debug {
520+
predicate flowStoreStep(
521+
Node nodeFrom, Flow::ClosureNode closureNodeFrom, CapturedVariable v,
522+
Flow::ClosureNode closureNodeTo, Node nodeTo
523+
) {
524+
closureNodeFrom = asClosureNode(nodeFrom) and
525+
closureNodeTo = asClosureNode(nodeTo) and
526+
Flow::storeStep(closureNodeFrom, v, closureNodeTo)
527+
}
528+
529+
predicate unmappedFlowStoreStep(
530+
Flow::ClosureNode closureNodeFrom, CapturedVariable v, Flow::ClosureNode closureNodeTo
531+
) {
532+
Flow::storeStep(closureNodeFrom, v, closureNodeTo) and
533+
not flowStoreStep(_, closureNodeFrom, v, closureNodeTo, _)
534+
}
535+
536+
predicate flowReadStep(
537+
Node nodeFrom, Flow::ClosureNode closureNodeFrom, CapturedVariable v,
538+
Flow::ClosureNode closureNodeTo, Node nodeTo
539+
) {
540+
closureNodeFrom = asClosureNode(nodeFrom) and
541+
closureNodeTo = asClosureNode(nodeTo) and
542+
Flow::readStep(closureNodeFrom, v, closureNodeTo)
543+
}
544+
545+
predicate unmappedFlowReadStep(
546+
Flow::ClosureNode closureNodeFrom, CapturedVariable v, Flow::ClosureNode closureNodeTo
547+
) {
548+
Flow::readStep(closureNodeFrom, v, closureNodeTo) and
549+
not flowReadStep(_, closureNodeFrom, v, closureNodeTo, _)
550+
}
551+
552+
predicate flowValueStep(
553+
Node nodeFrom, Flow::ClosureNode closureNodeFrom, Flow::ClosureNode closureNodeTo, Node nodeTo
554+
) {
555+
closureNodeFrom = asClosureNode(nodeFrom) and
556+
closureNodeTo = asClosureNode(nodeTo) and
557+
Flow::localFlowStep(closureNodeFrom, closureNodeTo)
558+
}
559+
560+
predicate unmappedFlowValueStep(
561+
Flow::ClosureNode closureNodeFrom, Flow::ClosureNode closureNodeTo
562+
) {
563+
Flow::localFlowStep(closureNodeFrom, closureNodeTo) and
564+
not flowValueStep(_, closureNodeFrom, closureNodeTo, _)
565+
}
566+
}
567+
}
568+
381569
//--------
382570
// Local flow
383571
//--------
@@ -471,6 +659,8 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
471659
simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo)
472660
or
473661
summaryFlowSteps(nodeFrom, nodeTo)
662+
or
663+
variableCaptureFlowStep(nodeFrom, nodeTo)
474664
}
475665

476666
/**
@@ -494,6 +684,11 @@ predicate summaryFlowSteps(Node nodeFrom, Node nodeTo) {
494684
IncludePostUpdateFlow<PhaseDependentFlow<summaryLocalStep/2>::step/2>::step(nodeFrom, nodeTo)
495685
}
496686

687+
predicate variableCaptureFlowStep(Node nodeFrom, Node nodeTo) {
688+
IncludePostUpdateFlow<PhaseDependentFlow<VariableCapture::valueStep/2>::step/2>::step(nodeFrom,
689+
nodeTo)
690+
}
691+
497692
/** `ModuleVariable`s are accessed via jump steps at runtime. */
498693
predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
499694
// Module variable read
@@ -557,7 +752,7 @@ predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }
557752

558753
predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }
559754

560-
predicate localMustFlowStep(Node node1, Node node2) { none() }
755+
predicate localMustFlowStep(Node nodeFrom, Node nodeTo) { none() }
561756

562757
/**
563758
* Gets the type of `node`.
@@ -661,6 +856,8 @@ predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) {
661856
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
662857
or
663858
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
859+
or
860+
VariableCapture::storeStep(nodeFrom, c, nodeTo)
664861
}
665862

666863
/**
@@ -864,6 +1061,8 @@ predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
8641061
nodeTo.(FlowSummaryNode).getSummaryNode())
8651062
or
8661063
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
1064+
or
1065+
VariableCapture::readStep(nodeFrom, c, nodeTo)
8671066
}
8681067

8691068
/** Data flows from a sequence to a subscript of the sequence. */
@@ -993,6 +1192,8 @@ predicate nodeIsHidden(Node n) {
9931192
n instanceof SynthDictSplatArgumentNode
9941193
or
9951194
n instanceof SynthDictSplatParameterNode
1195+
// or
1196+
// n instanceof CaptureNode
9961197
}
9971198

9981199
class LambdaCallKind = Unit;
@@ -1029,6 +1230,11 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves
10291230
*/
10301231
predicate allowParameterReturnInSelf(ParameterNode p) {
10311232
FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(p)
1233+
or
1234+
exists(Function f |
1235+
VariableCapture::Flow::heuristicAllowInstanceParameterReturnInSelf(f) and
1236+
p = TLambdaSelfReferenceNode(f)
1237+
)
10321238
}
10331239

10341240
/** An approximated `Content`. */

0 commit comments

Comments
 (0)