From 8c4dbca23c895b28fab9f256ce0c2b042b7174f6 Mon Sep 17 00:00:00 2001 From: Vasco-jofra <11303847+Vasco-jofra@users.noreply.github.com> Date: Sun, 15 Jun 2025 17:59:49 +0200 Subject: [PATCH 1/2] Improve data flow in the async library --- .../dataflow/internal/FlowSummaryPrivate.qll | 2 + .../javascript/frameworks/AsyncPackage.qll | 147 +++++++++++++----- .../AsyncPackage/AsyncTaintTracking.expected | 32 ++-- .../frameworks/AsyncPackage/map.js | 13 ++ .../frameworks/AsyncPackage/waterfall.js | 10 +- 5 files changed, 158 insertions(+), 46 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/dataflow/internal/FlowSummaryPrivate.qll b/javascript/ql/lib/semmle/javascript/dataflow/internal/FlowSummaryPrivate.qll index 31f5f16bbfb1..6315b34b0a4f 100644 --- a/javascript/ql/lib/semmle/javascript/dataflow/internal/FlowSummaryPrivate.qll +++ b/javascript/ql/lib/semmle/javascript/dataflow/internal/FlowSummaryPrivate.qll @@ -94,6 +94,8 @@ private string encodeContentAux(ContentSet cs, string arg) { cs = ContentSet::iteratorElement() and result = "IteratorElement" or cs = ContentSet::iteratorError() and result = "IteratorError" + or + cs = ContentSet::anyProperty() and result = "AnyMember" ) or cs = getPromiseContent(arg) and diff --git a/javascript/ql/lib/semmle/javascript/frameworks/AsyncPackage.qll b/javascript/ql/lib/semmle/javascript/frameworks/AsyncPackage.qll index 4dc60d447653..eacc69585ed5 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/AsyncPackage.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/AsyncPackage.qll @@ -15,14 +15,15 @@ module AsyncPackage { } /** - * Gets a reference to the given member or one of its `Limit` or `Series` variants. + * Gets `Limit` or `Series` name variants for a given member name. * - * For example, `memberVariant("map")` finds references to `map`, `mapLimit`, and `mapSeries`. + * For example, `memberNameVariant("map")` returns `map`, `mapLimit`, and `mapSeries`. */ - DataFlow::SourceNode memberVariant(string name) { - result = member(name) or - result = member(name + "Limit") or - result = member(name + "Series") + bindingset[name] + string memberNameVariant(string name) { + result = name or + result = name + "Limit" or + result = name + "Series" } /** @@ -101,22 +102,47 @@ module AsyncPackage { */ class IterationCall extends DataFlow::InvokeNode { string name; + int iteratorCallbackIndex; + int finalCallbackIndex; IterationCall() { - this = memberVariant(name).getACall() and - name = - [ - "concat", "detect", "each", "eachOf", "forEach", "forEachOf", "every", "filter", - "groupBy", "map", "mapValues", "reduce", "reduceRight", "reject", "some", "sortBy", - "transform" - ] + ( + ( + name = + memberNameVariant([ + "concat", "detect", "each", "eachOf", "forEach", "forEachOf", "every", "filter", + "groupBy", "map", "mapValues", "reject", "some", "sortBy", + ]) and + if name.matches("%Limit") + then ( + iteratorCallbackIndex = 2 and finalCallbackIndex = 3 + ) else ( + iteratorCallbackIndex = 1 and finalCallbackIndex = 2 + ) + ) + or + name = ["reduce", "reduceRight", "transform"] and + iteratorCallbackIndex = 2 and + finalCallbackIndex = 3 + ) and + this = member(name).getACall() } /** - * Gets the name of the iteration call, without the `Limit` or `Series` suffix. + * Gets the name of the iteration call */ string getName() { result = name } + /** + * Gets the iterator callback index + */ + int getIteratorCallbackIndex() { result = iteratorCallbackIndex } + + /** + * Gets the final callback index + */ + int getFinalCallbackIndex() { result = finalCallbackIndex } + /** * Gets the node holding the collection being iterated over. */ @@ -125,26 +151,73 @@ module AsyncPackage { /** * Gets the node holding the function being called for each element in the collection. */ - DataFlow::Node getIteratorCallback() { result = this.getArgument(this.getNumArgument() - 2) } + DataFlow::FunctionNode getIteratorCallback() { + result = this.getCallback(iteratorCallbackIndex) + } /** - * Gets the node holding the function being invoked after iteration is complete. + * Gets the node holding the function being invoked after iteration is complete. (may not exist) */ - DataFlow::Node getFinalCallback() { result = this.getArgument(this.getNumArgument() - 1) } + DataFlow::FunctionNode getFinalCallback() { result = this.getCallback(finalCallbackIndex) } + } + + /** + * An IterationCall with its iterator callback at index 1 + */ + private class IterationCallCallbacksFirstArg extends IterationCall { + IterationCallCallbacksFirstArg() { this.getIteratorCallbackIndex() = 1 } + } + + /** + * An IterationCall with its iterator callback at index 2 + */ + private class IterationCallCallbacksSecondArg extends IterationCall { + IterationCallCallbacksSecondArg() { this.getIteratorCallbackIndex() = 2 } + } + + /** + * The model with the iteratorCallbackIndex abstracted + */ + bindingset[iteratorCallbackIndex] + private predicate iterationCallPropagatesFlow( + string input, string output, boolean preservesValue, int iteratorCallbackIndex + ) { + preservesValue = true and + input = "Argument[0]." + ["ArrayElement", "SetElement", "IteratorElement", "AnyMember"] and + output = "Argument[" + iteratorCallbackIndex + "].Parameter[0]" } /** - * A taint step from the collection into the iterator callback of an iteration call. + * A taint step from the collection into the iterator callback (at index 1) of an iteration call. * * For example: `data -> item` in `async.each(data, (item, cb) => {})`. */ - private class IterationInputTaintStep extends TaintTracking::SharedTaintStep { - override predicate step(DataFlow::Node pred, DataFlow::Node succ) { - exists(DataFlow::FunctionNode iteratee, IterationCall call | - iteratee = call.getIteratorCallback() and // Require a closure to avoid spurious call/return mismatch. - pred = call.getCollection() and // TODO: needs a flow summary to ensure ArrayElement content is unfolded - succ = iteratee.getParameter(0) - ) + class IterationCallCallbacksFirstArgFlowSummary extends DataFlow::SummarizedCallable { + IterationCallCallbacksFirstArgFlowSummary() { this = "async.[IterationCallCallbacksFirstArg]" } + + override DataFlow::InvokeNode getACallSimple() { + result instanceof IterationCallCallbacksFirstArg + } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + iterationCallPropagatesFlow(input, output, preservesValue, 1) + } + } + + /** + * A taint step from the collection into the iterator callback (at index 2) of an iteration call. + * + * For example: `data -> item` in `async.eachLimit(data, 1, (item, cb) => {})`. + */ + class IterationCallCallbacksSecondArgFlowSummary extends DataFlow::SummarizedCallable { + IterationCallCallbacksSecondArgFlowSummary() { this = "async.[IterationCallCallbackSecondArg]" } + + override DataFlow::InvokeNode getACallSimple() { + result instanceof IterationCallCallbacksSecondArg + } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + iterationCallPropagatesFlow(input, output, preservesValue, 2) } } @@ -152,14 +225,14 @@ module AsyncPackage { * A taint step from the return value of an iterator callback to the result of the iteration * call. * - * For example: `item + taint()` -> result` in `async.map(data, (item, cb) => cb(null, item + taint()), (err, result) => {})`. + * For example: `item + taint() -> result` in `async.map(data, (item, cb) => cb(null, item + taint()), (err, result) => {})`. */ private class IterationOutputTaintStep extends TaintTracking::SharedTaintStep { override predicate step(DataFlow::Node pred, DataFlow::Node succ) { exists( DataFlow::FunctionNode iteratee, DataFlow::FunctionNode final, int i, IterationCall call | - iteratee = call.getIteratorCallback().getALocalSource() and + iteratee = call.getIteratorCallback() and final = call.getFinalCallback() and // Require a closure to avoid spurious call/return mismatch. pred = getLastParameter(iteratee).getACall().getArgument(i) and succ = final.getParameter(i) and @@ -175,14 +248,18 @@ module AsyncPackage { * * For example: `data -> result` in `async.sortBy(data, orderingFn, (err, result) => {})`. */ - private class IterationPreserveTaintStep extends TaintTracking::SharedTaintStep { - override predicate step(DataFlow::Node pred, DataFlow::Node succ) { - exists(DataFlow::FunctionNode final, IterationCall call | - final = call.getFinalCallback() and // Require a closure to avoid spurious call/return mismatch. - pred = call.getCollection() and - succ = final.getParameter(1) and - call.getName() = "sortBy" - ) + class IterationPreserveTaintStepFlowSummary extends DataFlow::SummarizedCallable { + IterationPreserveTaintStepFlowSummary() { this = "async.sortBy" } + + override DataFlow::InvokeNode getACallSimple() { + result instanceof IterationCall and + result.(IterationCall).getName() = "sortBy" + } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + preservesValue = false and + input = "Argument[0]." + ["ArrayElement", "SetElement", "IteratorElement", "AnyMember"] and + output = "Argument[2].Parameter[1]" } } } diff --git a/javascript/ql/test/library-tests/frameworks/AsyncPackage/AsyncTaintTracking.expected b/javascript/ql/test/library-tests/frameworks/AsyncPackage/AsyncTaintTracking.expected index 168f5ec5ace4..95ee8fe452b8 100644 --- a/javascript/ql/test/library-tests/frameworks/AsyncPackage/AsyncTaintTracking.expected +++ b/javascript/ql/test/library-tests/frameworks/AsyncPackage/AsyncTaintTracking.expected @@ -1,12 +1,24 @@ legacyDataFlowDifference -| each.js:11:9:11:16 | source() | each.js:13:12:13:15 | item | only flow with OLD data flow library | -| map.js:10:13:10:20 | source() | map.js:12:14:12:17 | item | only flow with OLD data flow library | -| map.js:26:13:26:20 | source() | map.js:28:27:28:32 | result | only flow with OLD data flow library | -| sortBy.js:10:22:10:29 | source() | sortBy.js:12:27:12:32 | result | only flow with OLD data flow library | +| each.js:11:9:11:16 | source() | each.js:13:12:13:15 | item | only flow with NEW data flow library | +| map.js:14:13:14:20 | source() | map.js:16:14:16:17 | item | only flow with NEW data flow library | +| map.js:30:13:30:20 | source() | map.js:32:27:32:32 | result | only flow with NEW data flow library | +| map.js:40:13:40:20 | source() | map.js:11:10:11:10 | x | only flow with NEW data flow library | +| map.js:42:12:42:19 | source() | map.js:11:10:11:10 | x | only flow with NEW data flow library | +| map.js:44:16:44:23 | source() | map.js:11:10:11:10 | x | only flow with NEW data flow library | +| map.js:46:18:46:25 | source() | map.js:11:10:11:10 | x | only flow with NEW data flow library | +| sortBy.js:10:22:10:29 | source() | sortBy.js:12:27:12:32 | result | only flow with NEW data flow library | #select -| map.js:20:19:20:26 | source() | map.js:23:27:23:32 | result | -| waterfall.js:8:30:8:37 | source() | waterfall.js:11:12:11:16 | taint | -| waterfall.js:8:30:8:37 | source() | waterfall.js:20:10:20:14 | taint | -| waterfall.js:28:18:28:25 | source() | waterfall.js:39:10:39:12 | err | -| waterfall.js:46:22:46:29 | source() | waterfall.js:49:12:49:16 | taint | -| waterfall.js:46:22:46:29 | source() | waterfall.js:55:10:55:14 | taint | +| each.js:11:9:11:16 | source() | each.js:13:12:13:15 | item | +| map.js:14:13:14:20 | source() | map.js:16:14:16:17 | item | +| map.js:24:19:24:26 | source() | map.js:27:27:27:32 | result | +| map.js:30:13:30:20 | source() | map.js:32:27:32:32 | result | +| map.js:40:13:40:20 | source() | map.js:11:10:11:10 | x | +| map.js:42:12:42:19 | source() | map.js:11:10:11:10 | x | +| map.js:44:16:44:23 | source() | map.js:11:10:11:10 | x | +| map.js:46:18:46:25 | source() | map.js:11:10:11:10 | x | +| sortBy.js:10:22:10:29 | source() | sortBy.js:12:27:12:32 | result | +| waterfall.js:16:30:16:37 | source() | waterfall.js:19:12:19:16 | taint | +| waterfall.js:16:30:16:37 | source() | waterfall.js:28:10:28:14 | taint | +| waterfall.js:36:18:36:25 | source() | waterfall.js:47:10:47:12 | err | +| waterfall.js:54:22:54:29 | source() | waterfall.js:57:12:57:16 | taint | +| waterfall.js:54:22:54:29 | source() | waterfall.js:63:10:63:14 | taint | diff --git a/javascript/ql/test/library-tests/frameworks/AsyncPackage/map.js b/javascript/ql/test/library-tests/frameworks/AsyncPackage/map.js index ed7e64b01fae..b1e9ecc883b6 100644 --- a/javascript/ql/test/library-tests/frameworks/AsyncPackage/map.js +++ b/javascript/ql/test/library-tests/frameworks/AsyncPackage/map.js @@ -7,6 +7,10 @@ function sink(x) { console.log(x) } +function call_sink(x) { + sink(x) +} + async_.map([source()], (item, cb) => { sink(item), // NOT OK @@ -32,3 +36,12 @@ async_.map(['safe'], (item, cb) => cb(null, item), (err, result) => sink(result) // OK ); + +async_.map([source()], call_sink) // NOT OK + +async_.map(source().prop, call_sink) // NOT OK + +async_.map({a: source()}, call_sink) // NOT OK + +async_.mapLimit([source()], 1, call_sink) // NOT OK + diff --git a/javascript/ql/test/library-tests/frameworks/AsyncPackage/waterfall.js b/javascript/ql/test/library-tests/frameworks/AsyncPackage/waterfall.js index 439ac48674a6..8554d048d988 100644 --- a/javascript/ql/test/library-tests/frameworks/AsyncPackage/waterfall.js +++ b/javascript/ql/test/library-tests/frameworks/AsyncPackage/waterfall.js @@ -1,7 +1,15 @@ let async_ = require('async'); let waterfall = require('a-sync-waterfall'); -var source, sink, somethingWrong; +function source() { + return 'TAINT' +} + +function sink(x) { + console.log(x) +} + +var somethingWrong; async_.waterfall([ function(callback) { From 575da5c31c8909a99a64dc20571bb7c23aa22635 Mon Sep 17 00:00:00 2001 From: Vasco-jofra <11303847+Vasco-jofra@users.noreply.github.com> Date: Thu, 26 Jun 2025 10:10:52 +0200 Subject: [PATCH 2/2] Merge SummarizedCallable into single class --- .../javascript/frameworks/AsyncPackage.qll | 75 ++++++------------- 1 file changed, 23 insertions(+), 52 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/frameworks/AsyncPackage.qll b/javascript/ql/lib/semmle/javascript/frameworks/AsyncPackage.qll index eacc69585ed5..db2487ce46a0 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/AsyncPackage.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/AsyncPackage.qll @@ -14,13 +14,24 @@ module AsyncPackage { result = DataFlow::moduleMember("async-es", name) } + /** + * Gets a reference to the given member or one of its `Limit` or `Series` variants. + * + * For example, `memberVariant("map")` finds references to `map`, `mapLimit`, and `mapSeries`. + */ + DataFlow::SourceNode memberVariant(string name) { + result = member(name) or + result = member(name + "Limit") or + result = member(name + "Series") + } + /** * Gets `Limit` or `Series` name variants for a given member name. * * For example, `memberNameVariant("map")` returns `map`, `mapLimit`, and `mapSeries`. */ bindingset[name] - string memberNameVariant(string name) { + private string memberNameVariant(string name) { result = name or result = name + "Limit" or result = name + "Series" @@ -161,63 +172,23 @@ module AsyncPackage { DataFlow::FunctionNode getFinalCallback() { result = this.getCallback(finalCallbackIndex) } } - /** - * An IterationCall with its iterator callback at index 1 - */ - private class IterationCallCallbacksFirstArg extends IterationCall { - IterationCallCallbacksFirstArg() { this.getIteratorCallbackIndex() = 1 } - } - - /** - * An IterationCall with its iterator callback at index 2 - */ - private class IterationCallCallbacksSecondArg extends IterationCall { - IterationCallCallbacksSecondArg() { this.getIteratorCallbackIndex() = 2 } - } - - /** - * The model with the iteratorCallbackIndex abstracted - */ - bindingset[iteratorCallbackIndex] - private predicate iterationCallPropagatesFlow( - string input, string output, boolean preservesValue, int iteratorCallbackIndex - ) { - preservesValue = true and - input = "Argument[0]." + ["ArrayElement", "SetElement", "IteratorElement", "AnyMember"] and - output = "Argument[" + iteratorCallbackIndex + "].Parameter[0]" - } + private class IterationCallFlowSummary extends DataFlow::SummarizedCallable { + private int callbackArgIndex; - /** - * A taint step from the collection into the iterator callback (at index 1) of an iteration call. - * - * For example: `data -> item` in `async.each(data, (item, cb) => {})`. - */ - class IterationCallCallbacksFirstArgFlowSummary extends DataFlow::SummarizedCallable { - IterationCallCallbacksFirstArgFlowSummary() { this = "async.[IterationCallCallbacksFirstArg]" } - - override DataFlow::InvokeNode getACallSimple() { - result instanceof IterationCallCallbacksFirstArg + IterationCallFlowSummary() { + this = "async.IteratorCall(callbackArgIndex=" + callbackArgIndex + ")" and + callbackArgIndex in [1 .. 3] } - override predicate propagatesFlow(string input, string output, boolean preservesValue) { - iterationCallPropagatesFlow(input, output, preservesValue, 1) - } - } - - /** - * A taint step from the collection into the iterator callback (at index 2) of an iteration call. - * - * For example: `data -> item` in `async.eachLimit(data, 1, (item, cb) => {})`. - */ - class IterationCallCallbacksSecondArgFlowSummary extends DataFlow::SummarizedCallable { - IterationCallCallbacksSecondArgFlowSummary() { this = "async.[IterationCallCallbackSecondArg]" } - override DataFlow::InvokeNode getACallSimple() { - result instanceof IterationCallCallbacksSecondArg + result instanceof IterationCall and + result.(IterationCall).getIteratorCallbackIndex() = callbackArgIndex } override predicate propagatesFlow(string input, string output, boolean preservesValue) { - iterationCallPropagatesFlow(input, output, preservesValue, 2) + preservesValue = true and + input = "Argument[0]." + ["ArrayElement", "SetElement", "IteratorElement", "AnyMember"] and + output = "Argument[" + callbackArgIndex + "].Parameter[0]" } } @@ -248,7 +219,7 @@ module AsyncPackage { * * For example: `data -> result` in `async.sortBy(data, orderingFn, (err, result) => {})`. */ - class IterationPreserveTaintStepFlowSummary extends DataFlow::SummarizedCallable { + private class IterationPreserveTaintStepFlowSummary extends DataFlow::SummarizedCallable { IterationPreserveTaintStepFlowSummary() { this = "async.sortBy" } override DataFlow::InvokeNode getACallSimple() {