Skip to content

Commit e5dcc8c

Browse files
authored
fix: lazy evaluation for coalesce (#17357)
* fix: lazy evaluation for coalesce * update test
1 parent 241b669 commit e5dcc8c

File tree

2 files changed

+42
-62
lines changed

2 files changed

+42
-62
lines changed

datafusion/functions/src/core/coalesce.rs

Lines changed: 31 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,13 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::array::{new_null_array, BooleanArray};
19-
use arrow::compute::kernels::zip::zip;
20-
use arrow::compute::{and, is_not_null, is_null};
2118
use arrow::datatypes::{DataType, Field, FieldRef};
22-
use datafusion_common::{exec_err, internal_err, Result};
19+
use datafusion_common::{exec_err, internal_err, plan_err, Result};
2320
use datafusion_expr::binary::try_type_union_resolution;
21+
use datafusion_expr::conditional_expressions::CaseBuilder;
22+
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
2423
use datafusion_expr::{
25-
ColumnarValue, Documentation, ReturnFieldArgs, ScalarFunctionArgs,
24+
ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs,
2625
};
2726
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
2827
use datafusion_macros::user_doc;
@@ -95,61 +94,36 @@ impl ScalarUDFImpl for CoalesceFunc {
9594
Ok(Field::new(self.name(), return_type, nullable).into())
9695
}
9796

98-
/// coalesce evaluates to the first value which is not NULL
99-
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
100-
let args = args.args;
101-
// do not accept 0 arguments.
97+
fn simplify(
98+
&self,
99+
args: Vec<Expr>,
100+
_info: &dyn SimplifyInfo,
101+
) -> Result<ExprSimplifyResult> {
102102
if args.is_empty() {
103-
return exec_err!(
104-
"coalesce was called with {} arguments. It requires at least 1.",
105-
args.len()
106-
);
103+
return plan_err!("coalesce must have at least one argument");
107104
}
108-
109-
let return_type = args[0].data_type();
110-
let mut return_array = args.iter().filter_map(|x| match x {
111-
ColumnarValue::Array(array) => Some(array.len()),
112-
_ => None,
113-
});
114-
115-
if let Some(size) = return_array.next() {
116-
// start with nulls as default output
117-
let mut current_value = new_null_array(&return_type, size);
118-
let mut remainder = BooleanArray::from(vec![true; size]);
119-
120-
for arg in args {
121-
match arg {
122-
ColumnarValue::Array(ref array) => {
123-
let to_apply = and(&remainder, &is_not_null(array.as_ref())?)?;
124-
current_value = zip(&to_apply, array, &current_value)?;
125-
remainder = and(&remainder, &is_null(array)?)?;
126-
}
127-
ColumnarValue::Scalar(value) => {
128-
if value.is_null() {
129-
continue;
130-
} else {
131-
let last_value = value.to_scalar()?;
132-
current_value = zip(&remainder, &last_value, &current_value)?;
133-
break;
134-
}
135-
}
136-
}
137-
if remainder.iter().all(|x| x == Some(false)) {
138-
break;
139-
}
140-
}
141-
Ok(ColumnarValue::Array(current_value))
142-
} else {
143-
let result = args
144-
.iter()
145-
.filter_map(|x| match x {
146-
ColumnarValue::Scalar(s) if !s.is_null() => Some(x.clone()),
147-
_ => None,
148-
})
149-
.next()
150-
.unwrap_or_else(|| args[0].clone());
151-
Ok(result)
105+
if args.len() == 1 {
106+
return Ok(ExprSimplifyResult::Simplified(
107+
args.into_iter().next().unwrap(),
108+
));
152109
}
110+
111+
let n = args.len();
112+
let (init, last_elem) = args.split_at(n - 1);
113+
let whens = init
114+
.iter()
115+
.map(|x| x.clone().is_not_null())
116+
.collect::<Vec<_>>();
117+
let cases = init.to_vec();
118+
Ok(ExprSimplifyResult::Simplified(
119+
CaseBuilder::new(None, whens, cases, Some(Box::new(last_elem[0].clone())))
120+
.end()?,
121+
))
122+
}
123+
124+
/// coalesce evaluates to the first value which is not NULL
125+
fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
126+
internal_err!("coalesce should have been simplified to case")
153127
}
154128

155129
fn short_circuits(&self) -> bool {

datafusion/sqllogictest/test_files/select.slt

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1656,10 +1656,10 @@ query TT
16561656
explain select coalesce(1, y/x), coalesce(2, y/x) from t;
16571657
----
16581658
logical_plan
1659-
01)Projection: coalesce(Int64(1), CAST(t.y / t.x AS Int64)), coalesce(Int64(2), CAST(t.y / t.x AS Int64))
1659+
01)Projection: CASE WHEN Boolean(true) THEN Int64(1) ELSE CAST(t.y / t.x AS Int64) END AS coalesce(Int64(1),t.y / t.x), CASE WHEN Boolean(true) THEN Int64(2) ELSE CAST(t.y / t.x AS Int64) END AS coalesce(Int64(2),t.y / t.x)
16601660
02)--TableScan: t projection=[x, y]
16611661
physical_plan
1662-
01)ProjectionExec: expr=[coalesce(1, CAST(y@1 / x@0 AS Int64)) as coalesce(Int64(1),t.y / t.x), coalesce(2, CAST(y@1 / x@0 AS Int64)) as coalesce(Int64(2),t.y / t.x)]
1662+
01)ProjectionExec: expr=[CASE WHEN true THEN 1 ELSE CAST(y@1 / x@0 AS Int64) END as coalesce(Int64(1),t.y / t.x), CASE WHEN true THEN 2 ELSE CAST(y@1 / x@0 AS Int64) END as coalesce(Int64(2),t.y / t.x)]
16631663
02)--DataSourceExec: partitions=1, partition_sizes=[1]
16641664

16651665
query TT
@@ -1686,11 +1686,17 @@ physical_plan
16861686
02)--ProjectionExec: expr=[y@1 = 0 as __common_expr_1, x@0 as x, y@1 as y]
16871687
03)----DataSourceExec: partitions=1, partition_sizes=[1]
16881688

1689-
# due to the reason describe in https://github.com/apache/datafusion/issues/8927,
1690-
# the following queries will fail
1691-
query error
1689+
query II
16921690
select coalesce(1, y/x), coalesce(2, y/x) from t;
1691+
----
1692+
1 2
1693+
1 2
1694+
1 2
1695+
1 2
1696+
1 2
16931697

1698+
# due to the reason describe in https://github.com/apache/datafusion/issues/8927,
1699+
# the following queries will fail
16941700
query error
16951701
SELECT y > 0 and 1 / y < 1, x > 0 and y > 0 and 1 / y < 1 / x from t;
16961702

0 commit comments

Comments
 (0)