Skip to content

Commit f8c0efe

Browse files
tlm365zhuliquan
authored andcommitted
Optimize performance of character_length function (apache#13696)
* Optimize performance of function Signed-off-by: Tai Le Manh <[email protected]> * Add pre-check array is null * Fix clippy warnings --------- Signed-off-by: Tai Le Manh <[email protected]>
1 parent d8c9cfb commit f8c0efe

File tree

1 file changed

+39
-18
lines changed

1 file changed

+39
-18
lines changed

datafusion/functions/src/unicode/character_length.rs

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
use crate::strings::StringArrayType;
1919
use crate::utils::{make_scalar_function, utf8_to_int_type};
2020
use arrow::array::{
21-
Array, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait, PrimitiveArray,
21+
Array, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait, PrimitiveBuilder,
2222
};
2323
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
2424
use datafusion_common::Result;
@@ -136,31 +136,52 @@ fn character_length(args: &[ArrayRef]) -> Result<ArrayRef> {
136136
}
137137
}
138138

139-
fn character_length_general<'a, T: ArrowPrimitiveType, V: StringArrayType<'a>>(
140-
array: V,
141-
) -> Result<ArrayRef>
139+
fn character_length_general<'a, T, V>(array: V) -> Result<ArrayRef>
142140
where
141+
T: ArrowPrimitiveType,
143142
T::Native: OffsetSizeTrait,
143+
V: StringArrayType<'a>,
144144
{
145+
let mut builder = PrimitiveBuilder::<T>::with_capacity(array.len());
146+
145147
// String characters are variable length encoded in UTF-8, counting the
146148
// number of chars requires expensive decoding, however checking if the
147149
// string is ASCII only is relatively cheap.
148150
// If strings are ASCII only, count bytes instead.
149151
let is_array_ascii_only = array.is_ascii();
150-
let iter = array.iter();
151-
let result = iter
152-
.map(|string| {
153-
string.map(|string: &str| {
154-
if is_array_ascii_only {
155-
T::Native::usize_as(string.len())
156-
} else {
157-
T::Native::usize_as(string.chars().count())
158-
}
159-
})
160-
})
161-
.collect::<PrimitiveArray<T>>();
162-
163-
Ok(Arc::new(result) as ArrayRef)
152+
if array.null_count() == 0 {
153+
if is_array_ascii_only {
154+
for i in 0..array.len() {
155+
let value = array.value(i);
156+
builder.append_value(T::Native::usize_as(value.len()));
157+
}
158+
} else {
159+
for i in 0..array.len() {
160+
let value = array.value(i);
161+
builder.append_value(T::Native::usize_as(value.chars().count()));
162+
}
163+
}
164+
} else if is_array_ascii_only {
165+
for i in 0..array.len() {
166+
if array.is_null(i) {
167+
builder.append_null();
168+
} else {
169+
let value = array.value(i);
170+
builder.append_value(T::Native::usize_as(value.len()));
171+
}
172+
}
173+
} else {
174+
for i in 0..array.len() {
175+
if array.is_null(i) {
176+
builder.append_null();
177+
} else {
178+
let value = array.value(i);
179+
builder.append_value(T::Native::usize_as(value.chars().count()));
180+
}
181+
}
182+
}
183+
184+
Ok(Arc::new(builder.finish()) as ArrayRef)
164185
}
165186

166187
#[cfg(test)]

0 commit comments

Comments
 (0)