Skip to content

Commit f6f35c6

Browse files
committed
feat: change Expr Alias and OuterReferenceColumn to Box type for reducing Expr size
1 parent 8d7b11b commit f6f35c6

File tree

33 files changed

+245
-207
lines changed

33 files changed

+245
-207
lines changed

datafusion/catalog-listing/src/helpers.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ pub fn expr_applicable_for_cols(col_names: &[&str], expr: &Expr) -> bool {
6363
}
6464
Expr::Literal(_, _)
6565
| Expr::Alias(_)
66-
| Expr::OuterReferenceColumn(_, _)
66+
| Expr::OuterReferenceColumn(_)
6767
| Expr::ScalarVariable(_, _)
6868
| Expr::Not(_)
6969
| Expr::IsNotNull(_)

datafusion/core/src/dataframe/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2221,15 +2221,15 @@ impl DataFrame {
22212221
if cols.contains(field) {
22222222
// Try to cast fill value to column type. If the cast fails, fallback to the original column.
22232223
match value.clone().cast_to(field.data_type()) {
2224-
Ok(fill_value) => Expr::Alias(Alias {
2224+
Ok(fill_value) => Expr::Alias(Box::new(Alias {
22252225
expr: Box::new(Expr::ScalarFunction(ScalarFunction {
22262226
func: coalesce(),
22272227
args: vec![col(field.name()), lit(fill_value)],
22282228
})),
22292229
relation: None,
22302230
name: field.name().to_string(),
22312231
metadata: None,
2232-
}),
2232+
})),
22332233
Err(_) => col(field.name()),
22342234
}
22352235
} else {

datafusion/core/src/physical_planner.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ use datafusion_datasource::file_groups::FileGroup;
7272
use datafusion_datasource::memory::MemorySourceConfig;
7373
use datafusion_expr::dml::{CopyTo, InsertOp};
7474
use datafusion_expr::expr::{
75-
physical_name, AggregateFunction, AggregateFunctionParams, Alias, GroupingSet,
75+
physical_name, AggregateFunction, AggregateFunctionParams, GroupingSet,
7676
WindowFunction, WindowFunctionParams,
7777
};
7878
use datafusion_expr::expr_rewriter::unnormalize_cols;
@@ -595,9 +595,9 @@ impl DefaultPhysicalPlanner {
595595
} = &window_fun.as_ref().params;
596596
generate_sort_key(partition_by, order_by)
597597
}
598-
Expr::Alias(Alias { expr, .. }) => {
598+
Expr::Alias(boxed_alias) => {
599599
// Convert &Box<T> to &T
600-
match &**expr {
600+
match boxed_alias.expr.as_ref() {
601601
Expr::WindowFunction(window_fun) => {
602602
let WindowFunctionParams {
603603
ref partition_by,
@@ -1681,7 +1681,7 @@ pub fn create_window_expr(
16811681
) -> Result<Arc<dyn WindowExpr>> {
16821682
// unpack aliased logical expressions, e.g. "sum(col) over () as total"
16831683
let (name, e) = match e {
1684-
Expr::Alias(Alias { expr, name, .. }) => (name.clone(), expr.as_ref()),
1684+
Expr::Alias(boxed_alias) => (boxed_alias.name.clone(), boxed_alias.expr.as_ref()),
16851685
_ => (e.schema_name().to_string(), e),
16861686
};
16871687
create_window_expr_with_name(e, name, logical_schema, execution_props)
@@ -1772,9 +1772,11 @@ pub fn create_aggregate_expr_and_maybe_filter(
17721772
) -> Result<AggregateExprWithOptionalArgs> {
17731773
// unpack (nested) aliased logical expressions, e.g. "sum(col) as total"
17741774
let (name, human_display, e) = match e {
1775-
Expr::Alias(Alias { expr, name, .. }) => {
1776-
(Some(name.clone()), String::default(), expr.as_ref())
1777-
}
1775+
Expr::Alias(boxed_alias) => (
1776+
Some(boxed_alias.name.clone()),
1777+
String::default(),
1778+
boxed_alias.expr.as_ref(),
1779+
),
17781780
Expr::AggregateFunction(_) => (
17791781
Some(e.schema_name().to_string()),
17801782
e.human_display().to_string(),

datafusion/core/tests/dataframe/dataframe_functions.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -415,11 +415,11 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
415415
");
416416

417417
// the arg2 parameter is a complex expr, but it can be evaluated to the literal value
418-
let alias_expr = Expr::Alias(Alias::new(
418+
let alias_expr = Expr::Alias(Box::new(Alias::new(
419419
cast(lit(0.5), DataType::Float32),
420420
None::<&str>,
421421
"arg_2".to_string(),
422-
));
422+
)));
423423
let expr = approx_percentile_cont(col("b").sort(true, false), alias_expr, None);
424424
let df = create_test_table().await?;
425425
let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
@@ -435,11 +435,11 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
435435
"
436436
);
437437

438-
let alias_expr = Expr::Alias(Alias::new(
438+
let alias_expr = Expr::Alias(Box::new(Alias::new(
439439
cast(lit(0.1), DataType::Float32),
440440
None::<&str>,
441441
"arg_2".to_string(),
442-
));
442+
)));
443443
let expr = approx_percentile_cont(col("b").sort(false, false), alias_expr, None);
444444
let df = create_test_table().await?;
445445
let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;

datafusion/core/tests/user_defined/expr_planner.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,11 @@ impl ExprPlanner for MyCustomPlanner {
5555
})))
5656
}
5757
BinaryOperator::Question => {
58-
Ok(PlannerResult::Planned(Expr::Alias(Alias::new(
58+
Ok(PlannerResult::Planned(Expr::Alias(Box::new(Alias::new(
5959
Expr::Literal(ScalarValue::Boolean(Some(true)), None),
6060
None::<&str>,
6161
format!("{} ? {}", expr.left, expr.right),
62-
))))
62+
)))))
6363
}
6464
_ => Ok(PlannerResult::Original(expr)),
6565
}

datafusion/expr/src/expr.rs

Lines changed: 43 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ use sqlparser::ast::{
279279
#[derive(Clone, PartialEq, PartialOrd, Eq, Debug, Hash)]
280280
pub enum Expr {
281281
/// An expression with a specific name.
282-
Alias(Alias),
282+
Alias(Box<Alias>),
283283
/// A named reference to a qualified field in a schema.
284284
Column(Column),
285285
/// A named reference to a variable in a registry.
@@ -362,7 +362,7 @@ pub enum Expr {
362362
Placeholder(Placeholder),
363363
/// A placeholder which holds a reference to a qualified field
364364
/// in the outer query, used for correlated sub queries.
365-
OuterReferenceColumn(DataType, Column),
365+
OuterReferenceColumn(Box<(DataType, Column)>),
366366
/// Unnest expression
367367
Unnest(Unnest),
368368
}
@@ -1421,7 +1421,9 @@ impl Expr {
14211421
name,
14221422
spans: _,
14231423
}) => (relation.clone(), name.clone()),
1424-
Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()),
1424+
Expr::Alias(boxed_alias) => {
1425+
(boxed_alias.relation.clone(), boxed_alias.name.clone())
1426+
}
14251427
_ => (None, self.schema_name().to_string()),
14261428
}
14271429
}
@@ -1443,7 +1445,7 @@ impl Expr {
14431445
Expr::Case { .. } => "Case",
14441446
Expr::Cast { .. } => "Cast",
14451447
Expr::Column(..) => "Column",
1446-
Expr::OuterReferenceColumn(_, _) => "Outer",
1448+
Expr::OuterReferenceColumn(_) => "Outer",
14471449
Expr::Exists { .. } => "Exists",
14481450
Expr::GroupingSet(..) => "GroupingSet",
14491451
Expr::InList { .. } => "InList",
@@ -1569,7 +1571,7 @@ impl Expr {
15691571

15701572
/// Return `self AS name` alias expression
15711573
pub fn alias(self, name: impl Into<String>) -> Expr {
1572-
Expr::Alias(Alias::new(self, None::<&str>, name.into()))
1574+
Expr::Alias(Box::new(Alias::new(self, None::<&str>, name.into())))
15731575
}
15741576

15751577
/// Return `self AS name` alias expression with metadata
@@ -1592,7 +1594,9 @@ impl Expr {
15921594
name: impl Into<String>,
15931595
metadata: Option<FieldMetadata>,
15941596
) -> Expr {
1595-
Expr::Alias(Alias::new(self, None::<&str>, name.into()).with_metadata(metadata))
1597+
Expr::Alias(Box::new(
1598+
Alias::new(self, None::<&str>, name.into()).with_metadata(metadata),
1599+
))
15961600
}
15971601

15981602
/// Return `self AS name` alias expression with a specific qualifier
@@ -1601,7 +1605,7 @@ impl Expr {
16011605
relation: Option<impl Into<TableReference>>,
16021606
name: impl Into<String>,
16031607
) -> Expr {
1604-
Expr::Alias(Alias::new(self, relation, name.into()))
1608+
Expr::Alias(Box::new(Alias::new(self, relation, name.into())))
16051609
}
16061610

16071611
/// Return `self AS name` alias expression with a specific qualifier and metadata
@@ -1625,7 +1629,9 @@ impl Expr {
16251629
name: impl Into<String>,
16261630
metadata: Option<FieldMetadata>,
16271631
) -> Expr {
1628-
Expr::Alias(Alias::new(self, relation, name.into()).with_metadata(metadata))
1632+
Expr::Alias(Box::new(
1633+
Alias::new(self, relation, name.into()).with_metadata(metadata),
1634+
))
16291635
}
16301636

16311637
/// Remove an alias from an expression if one exists.
@@ -2018,7 +2024,7 @@ impl Expr {
20182024
| Expr::SimilarTo(..)
20192025
| Expr::Not(..)
20202026
| Expr::Negative(..)
2021-
| Expr::OuterReferenceColumn(_, _)
2027+
| Expr::OuterReferenceColumn(_)
20222028
| Expr::TryCast(..)
20232029
| Expr::Unnest(..)
20242030
| Expr::Wildcard { .. }
@@ -2106,23 +2112,10 @@ impl NormalizeEq for Expr {
21062112
&& self_right.normalize_eq(other_right)
21072113
}
21082114
}
2109-
(
2110-
Expr::Alias(Alias {
2111-
expr: self_expr,
2112-
relation: self_relation,
2113-
name: self_name,
2114-
..
2115-
}),
2116-
Expr::Alias(Alias {
2117-
expr: other_expr,
2118-
relation: other_relation,
2119-
name: other_name,
2120-
..
2121-
}),
2122-
) => {
2123-
self_name == other_name
2124-
&& self_relation == other_relation
2125-
&& self_expr.normalize_eq(other_expr)
2115+
(Expr::Alias(boxed_alias), Expr::Alias(boxed_other_alias)) => {
2116+
boxed_alias.name == boxed_other_alias.name
2117+
&& boxed_alias.relation == boxed_other_alias.relation
2118+
&& boxed_alias.expr.normalize_eq(&*boxed_other_alias.expr)
21262119
}
21272120
(
21282121
Expr::Like(Like {
@@ -2453,14 +2446,9 @@ impl HashNode for Expr {
24532446
fn hash_node<H: Hasher>(&self, state: &mut H) {
24542447
mem::discriminant(self).hash(state);
24552448
match self {
2456-
Expr::Alias(Alias {
2457-
expr: _expr,
2458-
relation,
2459-
name,
2460-
..
2461-
}) => {
2462-
relation.hash(state);
2463-
name.hash(state);
2449+
Expr::Alias(boxed_alias) => {
2450+
boxed_alias.relation.hash(state);
2451+
boxed_alias.name.hash(state);
24642452
}
24652453
Expr::Column(column) => {
24662454
column.hash(state);
@@ -2601,7 +2589,8 @@ impl HashNode for Expr {
26012589
Expr::Placeholder(place_holder) => {
26022590
place_holder.hash(state);
26032591
}
2604-
Expr::OuterReferenceColumn(data_type, column) => {
2592+
Expr::OuterReferenceColumn(boxed_orc) => {
2593+
let (data_type, column) = boxed_orc.as_ref();
26052594
data_type.hash(state);
26062595
column.hash(state);
26072596
}
@@ -2665,12 +2654,14 @@ impl Display for SchemaDisplay<'_> {
26652654
}
26662655
}
26672656
// Expr is not shown since it is aliased
2668-
Expr::Alias(Alias {
2669-
name,
2670-
relation: Some(relation),
2671-
..
2672-
}) => write!(f, "{relation}.{name}"),
2673-
Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
2657+
Expr::Alias(boxed_alias) => {
2658+
let alias = boxed_alias.as_ref();
2659+
if let Some(relation) = &alias.relation {
2660+
write!(f, "{relation}.{}", alias.name)
2661+
} else {
2662+
write!(f, "{}", alias.name)
2663+
}
2664+
}
26742665
Expr::Between(Between {
26752666
expr,
26762667
negated,
@@ -2909,7 +2900,10 @@ impl Display for SqlDisplay<'_> {
29092900
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
29102901
match self.0 {
29112902
Expr::Literal(scalar, _) => scalar.fmt(f),
2912-
Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
2903+
Expr::Alias(boxed_alias) => {
2904+
let name = &boxed_alias.as_ref().name;
2905+
write!(f, "{name}")
2906+
}
29132907
Expr::Between(Between {
29142908
expr,
29152909
negated,
@@ -3169,9 +3163,13 @@ pub const UNNEST_COLUMN_PREFIX: &str = "UNNEST";
31693163
impl Display for Expr {
31703164
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
31713165
match self {
3172-
Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"),
3166+
Expr::Alias(boxed_alias) => {
3167+
let Alias { expr, name, .. } = boxed_alias.as_ref();
3168+
write!(f, "{expr} AS {name}")
3169+
}
31733170
Expr::Column(c) => write!(f, "{c}"),
3174-
Expr::OuterReferenceColumn(_, c) => {
3171+
Expr::OuterReferenceColumn(boxed_orc) => {
3172+
let (_, c) = boxed_orc.as_ref();
31753173
write!(f, "{OUTER_REFERENCE_COLUMN_PREFIX}({c})")
31763174
}
31773175
Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")),
@@ -3818,7 +3816,7 @@ mod test {
38183816
// If this test fails when you change `Expr`, please try
38193817
// `Box`ing the fields to make `Expr` smaller
38203818
// See https://github.com/apache/datafusion/issues/16199 for details
3821-
assert_eq!(size_of::<Expr>(), 128);
3819+
assert_eq!(size_of::<Expr>(), 112);
38223820
assert_eq!(size_of::<ScalarValue>(), 64);
38233821
assert_eq!(size_of::<DataType>(), 24); // 3 ptrs
38243822
assert_eq!(size_of::<Vec<Expr>>(), 24);

datafusion/expr/src/expr_fn.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ pub fn col(ident: impl Into<Column>) -> Expr {
6969
/// Create an out reference column which hold a reference that has been resolved to a field
7070
/// outside of the current plan.
7171
pub fn out_ref_col(dt: DataType, ident: impl Into<Column>) -> Expr {
72-
Expr::OuterReferenceColumn(dt, ident.into())
72+
Expr::OuterReferenceColumn(Box::new((dt, ident.into())))
7373
}
7474

7575
/// Create an unqualified column expression from the provided name, without normalizing

datafusion/expr/src/expr_rewriter/mod.rs

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,14 @@ pub fn create_col_from_scalar_expr(
174174
subqry_alias: String,
175175
) -> Result<Column> {
176176
match scalar_expr {
177-
Expr::Alias(Alias { name, .. }) => Ok(Column::new(
178-
Some::<TableReference>(subqry_alias.into()),
179-
name,
180-
)),
177+
Expr::Alias(alias_box) => {
178+
// alias_box: &Box<Alias> (if you’re matching on &expr) or Box<Alias>
179+
let name = &alias_box.name;
180+
Ok(Column::new(
181+
Some::<TableReference>(subqry_alias.into()),
182+
name,
183+
))
184+
}
181185
Expr::Column(col) => Ok(col.with_relation(subqry_alias.into())),
182186
_ => {
183187
let scalar_column = scalar_expr.schema_name().to_string();
@@ -200,7 +204,9 @@ pub fn unnormalize_cols(exprs: impl IntoIterator<Item = Expr>) -> Vec<Expr> {
200204
pub fn strip_outer_reference(expr: Expr) -> Expr {
201205
expr.transform(|expr| {
202206
Ok({
203-
if let Expr::OuterReferenceColumn(_, col) = expr {
207+
// Match the boxed (DataType, Column) tuple and extract the Column
208+
if let Expr::OuterReferenceColumn(boxed_pair) = expr {
209+
let (_data_type, col) = *boxed_pair;
204210
Transformed::yes(Expr::Column(col))
205211
} else {
206212
Transformed::no(expr)
@@ -250,7 +256,9 @@ fn coerce_exprs_for_schema(
250256
let new_type = dst_schema.field(idx).data_type();
251257
if new_type != &expr.get_type(src_schema)? {
252258
match expr {
253-
Expr::Alias(Alias { expr, name, .. }) => {
259+
Expr::Alias(alias_box) => {
260+
// alias_box: Box<Alias>
261+
let Alias { expr, name, .. } = *alias_box;
254262
Ok(expr.cast_to(new_type, src_schema)?.alias(name))
255263
}
256264
#[expect(deprecated)]
@@ -264,12 +272,13 @@ fn coerce_exprs_for_schema(
264272
.collect::<Result<_>>()
265273
}
266274

267-
/// Recursively un-alias an expressions
275+
/// Recursively un-alias an expression
268276
#[inline]
269277
pub fn unalias(expr: Expr) -> Expr {
270278
match expr {
271-
Expr::Alias(Alias { expr, .. }) => unalias(*expr),
272-
_ => expr,
279+
// Unbox the Alias, then recurse on the inner Expr
280+
Expr::Alias(boxed_alias) => unalias(*boxed_alias.expr),
281+
other => other,
273282
}
274283
}
275284

datafusion/expr/src/expr_rewriter/order_by.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,11 @@ fn rewrite_in_terms_of_projection(
137137
/// so avg(c) as average will match avgc
138138
fn expr_match(needle: &Expr, expr: &Expr) -> bool {
139139
// check inside aliases
140-
if let Expr::Alias(Alias { expr, .. }) = &expr {
141-
expr.as_ref() == needle
140+
if let Expr::Alias(alias_box) = expr {
141+
// alias_box: &Box<Alias>, so alias_box.as_ref() is &Alias
142+
let alias: &Alias = alias_box.as_ref();
143+
// alias.expr: Box<Expr>, so alias.expr.as_ref() is &Expr
144+
alias.expr.as_ref() == needle
142145
} else {
143146
expr == needle
144147
}

0 commit comments

Comments
 (0)