Skip to content

[analyzer] Don't assume third iteration in loops #119388

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,13 @@ New features
Crash and bug fixes
^^^^^^^^^^^^^^^^^^^

- In loops where the loop condition is opaque (i.e. the analyzer cannot
determine whether it's true or false), the analyzer will no longer assume
execution paths that perform more that two iterations. These unjustified
assumptions caused false positive reports (e.g. 100+ out-of-bounds reports in
the FFMPEG codebase) in loops where the programmer intended only two or three
steps but the analyzer wasn't able to understand that the loop is limited.

Improvements
^^^^^^^^^^^^

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,14 @@ class CoreEngine {
ExplodedNode *generateCallExitBeginNode(ExplodedNode *N,
const ReturnStmt *RS);

/// Helper function called by `HandleBranch()`. If the currently handled
/// branch corresponds to a loop, this returns the number of already
/// completed iterations in that loop, otherwise the return value is
/// `std::nullopt`. Note that this counts _all_ earlier iterations, including
/// ones that were performed within an earlier iteration of an outer loop.
std::optional<unsigned> getCompletedIterationCount(const CFGBlock *B,
ExplodedNode *Pred) const;

public:
/// Construct a CoreEngine object to analyze the provided CFG.
CoreEngine(ExprEngine &exprengine,
Expand Down
18 changes: 10 additions & 8 deletions clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,14 +321,14 @@ class ExprEngine {
NodeBuilderWithSinks &nodeBuilder,
ExplodedNode *Pred);

/// ProcessBranch - Called by CoreEngine. Used to generate successor
/// nodes by processing the 'effects' of a branch condition.
void processBranch(const Stmt *Condition,
NodeBuilderContext& BuilderCtx,
ExplodedNode *Pred,
ExplodedNodeSet &Dst,
const CFGBlock *DstT,
const CFGBlock *DstF);
/// ProcessBranch - Called by CoreEngine. Used to generate successor nodes by
/// processing the 'effects' of a branch condition. If the branch condition
/// is a loop condition, IterationsCompletedInLoop is the number of completed
/// iterations (otherwise it's std::nullopt).
void processBranch(const Stmt *Condition, NodeBuilderContext &BuilderCtx,
ExplodedNode *Pred, ExplodedNodeSet &Dst,
const CFGBlock *DstT, const CFGBlock *DstF,
std::optional<unsigned> IterationsCompletedInLoop);

/// Called by CoreEngine.
/// Used to generate successor nodes for temporary destructors depending
Expand Down Expand Up @@ -588,6 +588,8 @@ class ExprEngine {
void evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst, ExplodedNodeSet &Src,
const Expr *Ex);

bool didEagerlyAssumeBifurcateAt(ProgramStateRef State, const Expr *Ex) const;

static std::pair<const ProgramPointTag *, const ProgramPointTag *>
getEagerlyAssumeBifurcationTags();

Expand Down
27 changes: 26 additions & 1 deletion clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,8 @@ void CoreEngine::HandleBranch(const Stmt *Cond, const Stmt *Term,
NodeBuilderContext Ctx(*this, B, Pred);
ExplodedNodeSet Dst;
ExprEng.processBranch(Cond, Ctx, Pred, Dst, *(B->succ_begin()),
*(B->succ_begin() + 1));
*(B->succ_begin() + 1),
getCompletedIterationCount(B, Pred));
// Enqueue the new frontier onto the worklist.
enqueue(Dst);
}
Expand Down Expand Up @@ -591,6 +592,30 @@ ExplodedNode *CoreEngine::generateCallExitBeginNode(ExplodedNode *N,
return isNew ? Node : nullptr;
}

std::optional<unsigned>
CoreEngine::getCompletedIterationCount(const CFGBlock *B,
ExplodedNode *Pred) const {
const LocationContext *LC = Pred->getLocationContext();
BlockCounter Counter = WList->getBlockCounter();
unsigned BlockCount =
Counter.getNumVisited(LC->getStackFrame(), B->getBlockID());

const Stmt *Term = B->getTerminatorStmt();
if (isa<ForStmt, WhileStmt, CXXForRangeStmt>(Term)) {
assert(BlockCount >= 1 &&
"Block count of currently analyzed block must be >= 1");
return BlockCount - 1;
}
if (isa<DoStmt>(Term)) {
// In a do-while loop one iteration happens before the first evaluation of
// the loop condition, so we don't subtract one.
return BlockCount;
}
// ObjCForCollectionStmt is skipped intentionally because the current
// application of the iteration counts is not relevant for it.
return std::nullopt;
}

void CoreEngine::enqueue(ExplodedNodeSet &Set) {
for (const auto I : Set)
WList->enqueue(I);
Expand Down
58 changes: 50 additions & 8 deletions clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2753,12 +2753,10 @@ assumeCondition(const Stmt *Condition, ExplodedNode *N) {
return State->assume(V);
}

void ExprEngine::processBranch(const Stmt *Condition,
NodeBuilderContext& BldCtx,
ExplodedNode *Pred,
ExplodedNodeSet &Dst,
const CFGBlock *DstT,
const CFGBlock *DstF) {
void ExprEngine::processBranch(
const Stmt *Condition, NodeBuilderContext &BldCtx, ExplodedNode *Pred,
ExplodedNodeSet &Dst, const CFGBlock *DstT, const CFGBlock *DstF,
std::optional<unsigned> IterationsCompletedInLoop) {
assert((!Condition || !isa<CXXBindTemporaryExpr>(Condition)) &&
"CXXBindTemporaryExprs are handled by processBindTemporary.");
const LocationContext *LCtx = Pred->getLocationContext();
Expand Down Expand Up @@ -2801,8 +2799,35 @@ void ExprEngine::processBranch(const Stmt *Condition,
if (StTrue && StFalse)
assert(!isa<ObjCForCollectionStmt>(Condition));

if (StTrue)
Builder.generateNode(StTrue, true, PredN);
if (StTrue) {
// If we are processing a loop condition where two iterations have
// already been completed and the false branch is also feasible, then
// don't assume a third iteration because it is a redundant execution
// path (unlikely to be different from earlier loop exits) and can cause
// false positives if e.g. the loop iterates over a two-element structure
// with an opaque condition.
//
// The iteration count "2" is hardcoded because it's the natural limit:
// * the fact that the programmer wrote a loop (and not just an `if`)
// implies that they thought that the loop body might be executed twice;
// * however, there are situations where the programmer knows that there
// are at most two iterations but writes a loop that appears to be
// generic, because there is no special syntax for "loop with at most
// two iterations". (This pattern is common in FFMPEG and appears in
// many other projects as well.)
bool CompletedTwoIterations = IterationsCompletedInLoop.value_or(0) >= 2;
bool FalseAlsoFeasible =
StFalse ||
didEagerlyAssumeBifurcateAt(PrevState, dyn_cast<Expr>(Condition));
bool SkipTrueBranch = CompletedTwoIterations && FalseAlsoFeasible;

// FIXME: This "don't assume third iteration" heuristic partially
// conflicts with the widen-loop analysis option (which is off by
// default). If we intend to support and stabilize the loop widening,
// we must ensure that it 'plays nicely' with this logic.
if (!SkipTrueBranch || AMgr.options.ShouldWidenLoops)
Builder.generateNode(StTrue, true, PredN);
}

if (StFalse)
Builder.generateNode(StFalse, false, PredN);
Expand Down Expand Up @@ -3724,6 +3749,12 @@ ExprEngine::getEagerlyAssumeBifurcationTags() {
return std::make_pair(&TrueTag, &FalseTag);
}

/// If the last EagerlyAssume attempt was successful (i.e. the true and false
/// cases were both feasible), this state trait stores the expression where it
/// happened; otherwise this holds nullptr.
REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerlyAssumeExprIfSuccessful,
const Expr *)

void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,
ExplodedNodeSet &Src,
const Expr *Ex) {
Expand All @@ -3739,13 +3770,19 @@ void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,
}

ProgramStateRef State = Pred->getState();
State = State->set<LastEagerlyAssumeExprIfSuccessful>(nullptr);
SVal V = State->getSVal(Ex, Pred->getLocationContext());
std::optional<nonloc::SymbolVal> SEV = V.getAs<nonloc::SymbolVal>();
if (SEV && SEV->isExpression()) {
const auto &[TrueTag, FalseTag] = getEagerlyAssumeBifurcationTags();

auto [StateTrue, StateFalse] = State->assume(*SEV);

if (StateTrue && StateFalse) {
StateTrue = StateTrue->set<LastEagerlyAssumeExprIfSuccessful>(Ex);
StateFalse = StateFalse->set<LastEagerlyAssumeExprIfSuccessful>(Ex);
}

// First assume that the condition is true.
if (StateTrue) {
SVal Val = svalBuilder.makeIntVal(1U, Ex->getType());
Expand All @@ -3763,6 +3800,11 @@ void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,
}
}

bool ExprEngine::didEagerlyAssumeBifurcateAt(ProgramStateRef State,
const Expr *Ex) const {
return Ex && State->get<LastEagerlyAssumeExprIfSuccessful>() == Ex;
}

void ExprEngine::VisitGCCAsmStmt(const GCCAsmStmt *A, ExplodedNode *Pred,
ExplodedNodeSet &Dst) {
StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);
Expand Down
Loading
Loading