Skip to content

Commit da55c5d

Browse files
committed
[task #8987]add_to_date_function
Signed-off-by: tangruilin <[email protected]>
1 parent d6d35f7 commit da55c5d

File tree

11 files changed

+205
-36
lines changed

11 files changed

+205
-36
lines changed

datafusion/common/src/scalar.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ use arrow::{
5252
},
5353
};
5454
use arrow_array::cast::as_list_array;
55+
use arrow_array::types::Date32Type;
5556

5657
/// A dynamically typed, nullable single value, (the single-valued counter-part
5758
/// to arrow's [`Array`])
@@ -3239,6 +3240,12 @@ impl ScalarType<i64> for TimestampNanosecondType {
32393240
}
32403241
}
32413242

3243+
impl ScalarType<i32> for Date32Type {
3244+
fn scalar(r: Option<i32>) -> ScalarValue {
3245+
ScalarValue::Date32(r)
3246+
}
3247+
}
3248+
32423249
#[cfg(test)]
32433250
mod tests {
32443251
use std::cmp::Ordering;

datafusion/expr/src/built_in_function.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ pub enum BuiltinScalarFunction {
285285
ToTimestampSeconds,
286286
/// from_unixtime
287287
FromUnixtime,
288+
/// to_date
289+
ToDate,
288290
///now
289291
Now,
290292
///current_date
@@ -487,6 +489,7 @@ impl BuiltinScalarFunction {
487489
BuiltinScalarFunction::RegexpMatch => Volatility::Immutable,
488490
BuiltinScalarFunction::Struct => Volatility::Immutable,
489491
BuiltinScalarFunction::FromUnixtime => Volatility::Immutable,
492+
BuiltinScalarFunction::ToDate => Volatility::Immutable,
490493
BuiltinScalarFunction::ArrowTypeof => Volatility::Immutable,
491494
BuiltinScalarFunction::OverLay => Volatility::Immutable,
492495
BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
@@ -802,6 +805,7 @@ impl BuiltinScalarFunction {
802805
BuiltinScalarFunction::ToTimestampMicros => Ok(Timestamp(Microsecond, None)),
803806
BuiltinScalarFunction::ToTimestampSeconds => Ok(Timestamp(Second, None)),
804807
BuiltinScalarFunction::FromUnixtime => Ok(Timestamp(Second, None)),
808+
BuiltinScalarFunction::ToDate => Ok(Date32),
805809
BuiltinScalarFunction::Now => {
806810
Ok(Timestamp(Nanosecond, Some("+00:00".into())))
807811
}
@@ -1050,6 +1054,7 @@ impl BuiltinScalarFunction {
10501054
BuiltinScalarFunction::FromUnixtime => {
10511055
Signature::uniform(1, vec![Int64], self.volatility())
10521056
}
1057+
BuiltinScalarFunction::ToDate => Signature::variadic_any(self.volatility()),
10531058
BuiltinScalarFunction::Digest => Signature::one_of(
10541059
vec![
10551060
Exact(vec![Utf8, Utf8]),
@@ -1494,6 +1499,7 @@ impl BuiltinScalarFunction {
14941499
BuiltinScalarFunction::ToTimestampSeconds => &["to_timestamp_seconds"],
14951500
BuiltinScalarFunction::ToTimestampNanos => &["to_timestamp_nanos"],
14961501
BuiltinScalarFunction::FromUnixtime => &["from_unixtime"],
1502+
BuiltinScalarFunction::ToDate => &["to_date"],
14971503

14981504
// hashing functions
14991505
BuiltinScalarFunction::Digest => &["digest"],

datafusion/expr/src/expr_fn.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -885,6 +885,11 @@ nary_scalar_expr!(
885885
scalar_expr!(DatePart, date_part, part date, "extracts a subfield from the date");
886886
scalar_expr!(DateTrunc, date_trunc, part date, "truncates the date to a specified level of precision");
887887
scalar_expr!(DateBin, date_bin, stride source origin, "coerces an arbitrary timestamp to the start of the nearest specified interval");
888+
nary_scalar_expr!(
889+
ToDate,
890+
to_date,
891+
"converts a string and optional formats to a `Date32`"
892+
);
888893
nary_scalar_expr!(
889894
ToTimestamp,
890895
to_timestamp,

datafusion/physical-expr/src/datetime_expressions.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,42 @@ fn string_to_timestamp_nanos_shim(s: &str) -> Result<i64> {
396396
string_to_timestamp_nanos(s).map_err(|e| e.into())
397397
}
398398

399+
fn to_date_impl(args: &[ColumnarValue], name: &str) -> Result<ColumnarValue> {
400+
match args.len() {
401+
1 => handle::<Date32Type, _, Date32Type>(
402+
args,
403+
|s| {
404+
string_to_timestamp_nanos_shim(s)
405+
.map(|n| n / (1_000_000 * 24 * 60 * 60 * 1_000))
406+
.and_then(|v| {
407+
v.try_into().map_err(|_| {
408+
DataFusionError::NotImplemented("()".to_string())
409+
})
410+
})
411+
},
412+
name,
413+
),
414+
n if n >= 2 => handle_multiple::<Date32Type, _, Date32Type, _>(
415+
args,
416+
|s, format| {
417+
string_to_timestamp_nanos_formatted(s, format)
418+
.map(|n| {
419+
println!("{n}");
420+
n / (1_000_000 * 24 * 60 * 60 * 1_000)
421+
})
422+
.and_then(|v| {
423+
v.try_into().map_err(|_| {
424+
DataFusionError::NotImplemented("()".to_string())
425+
})
426+
})
427+
},
428+
|n| n,
429+
name,
430+
),
431+
_ => internal_err!("Unsupported 0 argument count for function {name}"),
432+
}
433+
}
434+
399435
fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
400436
args: &[ColumnarValue],
401437
name: &str,
@@ -423,6 +459,11 @@ fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
423459
}
424460
}
425461

462+
/// to_date SQL function
463+
pub fn to_date(args: &[ColumnarValue]) -> Result<ColumnarValue> {
464+
to_date_impl(args, "to_date")
465+
}
466+
426467
/// to_timestamp SQL function
427468
///
428469
/// Note: `to_timestamp` returns `Timestamp(Nanosecond)` though its arguments are interpreted as **seconds**.
@@ -1308,6 +1349,36 @@ fn validate_to_timestamp_data_types(
13081349
None
13091350
}
13101351

1352+
/// to_date SQL function implementation
1353+
pub fn to_date_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
1354+
if args.is_empty() {
1355+
return exec_err!(
1356+
"to_date function requires 1 or more arguments, got {}",
1357+
args.len()
1358+
);
1359+
}
1360+
1361+
// validate that any args after the first one are Utf8
1362+
if args.len() > 1 {
1363+
if let Some(value) = validate_to_timestamp_data_types(args, "to_date") {
1364+
return value;
1365+
}
1366+
}
1367+
1368+
match args[0].data_type() {
1369+
DataType::Int32
1370+
| DataType::Int64
1371+
| DataType::Null
1372+
| DataType::Float64
1373+
| DataType::Date32
1374+
| DataType::Date64 => cast_column(&args[0], &DataType::Date32, None),
1375+
DataType::Utf8 => to_date(args),
1376+
other => {
1377+
internal_err!("Unsupported data type {:?} for function to_date", other)
1378+
}
1379+
}
1380+
}
1381+
13111382
/// to_timestamp() SQL function implementation
13121383
pub fn to_timestamp_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
13131384
if args.is_empty() {

datafusion/physical-expr/src/functions.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,7 @@ pub fn create_physical_fun(
574574
BuiltinScalarFunction::FromUnixtime => {
575575
Arc::new(datetime_expressions::from_unixtime_invoke)
576576
}
577+
BuiltinScalarFunction::ToDate => Arc::new(datetime_expressions::to_date_invoke),
577578
BuiltinScalarFunction::InitCap => Arc::new(|args| match args[0].data_type() {
578579
DataType::Utf8 => {
579580
make_scalar_function_inner(string_expressions::initcap::<i32>)(args)

datafusion/proto/proto/datafusion.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,7 @@ enum ScalarFunction {
670670
EndsWith = 131;
671671
InStr = 132;
672672
MakeDate = 133;
673+
ToDate = 134;
673674
}
674675

675676
message ScalarFunctionNode {

datafusion/proto/src/generated/pbjson.rs

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)