|
18 | 18 | use crate::strings::StringArrayType;
|
19 | 19 | use crate::utils::{make_scalar_function, utf8_to_int_type};
|
20 | 20 | use arrow::array::{
|
21 |
| - Array, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait, PrimitiveArray, |
| 21 | + Array, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait, PrimitiveBuilder, |
22 | 22 | };
|
23 | 23 | use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
|
24 | 24 | use datafusion_common::Result;
|
@@ -136,31 +136,52 @@ fn character_length(args: &[ArrayRef]) -> Result<ArrayRef> {
|
136 | 136 | }
|
137 | 137 | }
|
138 | 138 |
|
139 |
| -fn character_length_general<'a, T: ArrowPrimitiveType, V: StringArrayType<'a>>( |
140 |
| - array: V, |
141 |
| -) -> Result<ArrayRef> |
| 139 | +fn character_length_general<'a, T, V>(array: V) -> Result<ArrayRef> |
142 | 140 | where
|
| 141 | + T: ArrowPrimitiveType, |
143 | 142 | T::Native: OffsetSizeTrait,
|
| 143 | + V: StringArrayType<'a>, |
144 | 144 | {
|
| 145 | + let mut builder = PrimitiveBuilder::<T>::with_capacity(array.len()); |
| 146 | + |
145 | 147 | // String characters are variable length encoded in UTF-8, counting the
|
146 | 148 | // number of chars requires expensive decoding, however checking if the
|
147 | 149 | // string is ASCII only is relatively cheap.
|
148 | 150 | // If strings are ASCII only, count bytes instead.
|
149 | 151 | let is_array_ascii_only = array.is_ascii();
|
150 |
| - let iter = array.iter(); |
151 |
| - let result = iter |
152 |
| - .map(|string| { |
153 |
| - string.map(|string: &str| { |
154 |
| - if is_array_ascii_only { |
155 |
| - T::Native::usize_as(string.len()) |
156 |
| - } else { |
157 |
| - T::Native::usize_as(string.chars().count()) |
158 |
| - } |
159 |
| - }) |
160 |
| - }) |
161 |
| - .collect::<PrimitiveArray<T>>(); |
162 |
| - |
163 |
| - Ok(Arc::new(result) as ArrayRef) |
| 152 | + if array.null_count() == 0 { |
| 153 | + if is_array_ascii_only { |
| 154 | + for i in 0..array.len() { |
| 155 | + let value = array.value(i); |
| 156 | + builder.append_value(T::Native::usize_as(value.len())); |
| 157 | + } |
| 158 | + } else { |
| 159 | + for i in 0..array.len() { |
| 160 | + let value = array.value(i); |
| 161 | + builder.append_value(T::Native::usize_as(value.chars().count())); |
| 162 | + } |
| 163 | + } |
| 164 | + } else if is_array_ascii_only { |
| 165 | + for i in 0..array.len() { |
| 166 | + if array.is_null(i) { |
| 167 | + builder.append_null(); |
| 168 | + } else { |
| 169 | + let value = array.value(i); |
| 170 | + builder.append_value(T::Native::usize_as(value.len())); |
| 171 | + } |
| 172 | + } |
| 173 | + } else { |
| 174 | + for i in 0..array.len() { |
| 175 | + if array.is_null(i) { |
| 176 | + builder.append_null(); |
| 177 | + } else { |
| 178 | + let value = array.value(i); |
| 179 | + builder.append_value(T::Native::usize_as(value.chars().count())); |
| 180 | + } |
| 181 | + } |
| 182 | + } |
| 183 | + |
| 184 | + Ok(Arc::new(builder.finish()) as ArrayRef) |
164 | 185 | }
|
165 | 186 |
|
166 | 187 | #[cfg(test)]
|
|
0 commit comments