Skip to content

fix: LimitPushdown rule uncorrect remove some GlobalLimitExec #14245

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions datafusion/core/tests/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2217,11 +2217,6 @@ async fn write_parquet_with_order() -> Result<()> {
let df = ctx.sql("SELECT * FROM data").await?;
let results = df.collect().await?;

let df_explain = ctx.sql("explain SELECT a FROM data").await?;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Remove print info

let explain_result = df_explain.collect().await?;

println!("explain_result {:?}", explain_result);

assert_batches_eq!(
&[
"+---+---+",
Expand Down
11 changes: 9 additions & 2 deletions datafusion/physical-optimizer/src/limit_pushdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties};

/// This rule inspects [`ExecutionPlan`]'s and pushes down the fetch limit from
/// the parent to the child if applicable.
#[derive(Default, Debug)]
Expand Down Expand Up @@ -248,7 +247,15 @@ pub fn pushdown_limit_helper(
}
} else {
// Add fetch or a `LimitExec`:
global_state.satisfied = true;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is the original logic for setting back to true.

// If the plan's children have limit and the child's limit < parent's limit, we shouldn't change the global state to true,
// because the children limit will be overridden if the global state is changed.
if !pushdown_plan
.children()
.iter()
.any(|&child| extract_limit(child).is_some())
{
global_state.satisfied = true;
}
pushdown_plan = if let Some(plan_with_fetch) = maybe_fetchable {
if global_skip > 0 {
add_global_limit(plan_with_fetch, global_skip, Some(global_fetch))
Expand Down
61 changes: 61 additions & 0 deletions datafusion/sqllogictest/test_files/limit.slt
Original file line number Diff line number Diff line change
Expand Up @@ -711,3 +711,64 @@ OFFSET 3 LIMIT 2;

statement ok
drop table ordered_table;

# Test issue: https://github.com/apache/datafusion/issues/14204
# Test limit pushdown with subquery
statement ok
create table testSubQueryLimit (a int, b int) as values (1,2), (2,3), (3,4);

query IIII
select * from testSubQueryLimit as t1 join (select * from testSubQueryLimit limit 1) limit 10;
----
1 2 1 2
2 3 1 2
3 4 1 2

query TT
explain select * from testSubQueryLimit as t1 join (select * from testSubQueryLimit limit 1) limit 10;
----
logical_plan
01)Limit: skip=0, fetch=10
02)--Cross Join:
03)----SubqueryAlias: t1
04)------Limit: skip=0, fetch=10
05)--------TableScan: testsubquerylimit projection=[a, b], fetch=10
06)----Limit: skip=0, fetch=1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this plan looks good to me -- the Limit 1 is still here

07)------TableScan: testsubquerylimit projection=[a, b], fetch=1
physical_plan
01)ProjectionExec: expr=[a@2 as a, b@3 as b, a@0 as a, b@1 as b]
02)--GlobalLimitExec: skip=0, fetch=10
03)----CrossJoinExec
04)------GlobalLimitExec: skip=0, fetch=1
05)--------MemoryExec: partitions=1, partition_sizes=[1]
06)------GlobalLimitExec: skip=0, fetch=10
07)--------MemoryExec: partitions=1, partition_sizes=[1]


query IIII
select * from testSubQueryLimit as t1 join (select * from testSubQueryLimit limit 10) limit 2;
----
1 2 1 2
1 2 2 3

query TT
explain select * from testSubQueryLimit as t1 join (select * from testSubQueryLimit limit 10) limit 2;
----
logical_plan
01)Limit: skip=0, fetch=2
02)--Cross Join:
03)----SubqueryAlias: t1
04)------Limit: skip=0, fetch=2
05)--------TableScan: testsubquerylimit projection=[a, b], fetch=2
06)----Limit: skip=0, fetch=2
07)------TableScan: testsubquerylimit projection=[a, b], fetch=2
physical_plan
01)GlobalLimitExec: skip=0, fetch=2
02)--CrossJoinExec
03)----GlobalLimitExec: skip=0, fetch=2
04)------MemoryExec: partitions=1, partition_sizes=[1]
05)----GlobalLimitExec: skip=0, fetch=2
06)------MemoryExec: partitions=1, partition_sizes=[1]

statement ok
drop table testSubQueryLimit;