Skip to content

Commit 6d4e2f2

Browse files
authored
Implement min max support for string/binary view types (#6053)
* add * implement min max support for string/binary view * update tests
1 parent 31b8ba0 commit 6d4e2f2

File tree

1 file changed

+142
-46
lines changed

1 file changed

+142
-46
lines changed

arrow-arith/src/aggregate.rs

Lines changed: 142 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -415,21 +415,41 @@ pub fn max_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&
415415
min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
416416
}
417417

418+
/// Returns the maximum value in the binary view array, according to the natural order.
419+
pub fn max_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
420+
min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
421+
}
422+
418423
/// Returns the minimum value in the binary array, according to the natural order.
419424
pub fn min_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> {
420425
min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
421426
}
422427

428+
/// Returns the minimum value in the binary view array, according to the natural order.
429+
pub fn min_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
430+
min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
431+
}
432+
423433
/// Returns the maximum value in the string array, according to the natural order.
424434
pub fn max_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> {
425435
min_max_helper::<&str, _, _>(array, |a, b| *a < *b)
426436
}
427437

438+
/// Returns the maximum value in the string view array, according to the natural order.
439+
pub fn max_string_view(array: &StringViewArray) -> Option<&str> {
440+
min_max_helper::<&str, _, _>(array, |a, b| *a < *b)
441+
}
442+
428443
/// Returns the minimum value in the string array, according to the natural order.
429444
pub fn min_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> {
430445
min_max_helper::<&str, _, _>(array, |a, b| *a > *b)
431446
}
432447

448+
/// Returns the minimum value in the string view array, according to the natural order.
449+
pub fn min_string_view(array: &StringViewArray) -> Option<&str> {
450+
min_max_helper::<&str, _, _>(array, |a, b| *a > *b)
451+
}
452+
433453
/// Returns the sum of values in the array.
434454
///
435455
/// This doesn't detect overflow. Once overflowing, the result will wrap around.
@@ -1132,61 +1152,137 @@ mod tests {
11321152
assert!(max(&a).unwrap().is_nan());
11331153
}
11341154

1135-
#[test]
1136-
fn test_binary_min_max_with_nulls() {
1137-
let a = BinaryArray::from(vec![
1138-
Some("b".as_bytes()),
1155+
macro_rules! test_binary {
1156+
($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => {
1157+
#[test]
1158+
fn $NAME() {
1159+
let binary = BinaryArray::from($ARRAY);
1160+
assert_eq!($EXPECTED_MIN, min_binary(&binary));
1161+
assert_eq!($EXPECTED_MAX, max_binary(&binary));
1162+
1163+
let large_binary = LargeBinaryArray::from($ARRAY);
1164+
assert_eq!($EXPECTED_MIN, min_binary(&large_binary));
1165+
assert_eq!($EXPECTED_MAX, max_binary(&large_binary));
1166+
1167+
let binary_view = BinaryViewArray::from($ARRAY);
1168+
assert_eq!($EXPECTED_MIN, min_binary_view(&binary_view));
1169+
assert_eq!($EXPECTED_MAX, max_binary_view(&binary_view));
1170+
}
1171+
};
1172+
}
1173+
1174+
test_binary!(
1175+
test_binary_min_max_with_nulls,
1176+
vec![
1177+
Some("b01234567890123".as_bytes()), // long bytes
11391178
None,
11401179
None,
11411180
Some(b"a"),
11421181
Some(b"c"),
1143-
]);
1144-
assert_eq!(Some("a".as_bytes()), min_binary(&a));
1145-
assert_eq!(Some("c".as_bytes()), max_binary(&a));
1146-
}
1147-
1148-
#[test]
1149-
fn test_binary_min_max_no_null() {
1150-
let a = BinaryArray::from(vec![Some("b".as_bytes()), Some(b"a"), Some(b"c")]);
1151-
assert_eq!(Some("a".as_bytes()), min_binary(&a));
1152-
assert_eq!(Some("c".as_bytes()), max_binary(&a));
1153-
}
1182+
Some(b"abcdedfg0123456"),
1183+
],
1184+
Some("a".as_bytes()),
1185+
Some("c".as_bytes())
1186+
);
1187+
1188+
test_binary!(
1189+
test_binary_min_max_no_null,
1190+
vec![
1191+
Some("b".as_bytes()),
1192+
Some(b"abcdefghijklmnopqrst"), // long bytes
1193+
Some(b"c"),
1194+
Some(b"b01234567890123"), // long bytes for view types
1195+
],
1196+
Some("abcdefghijklmnopqrst".as_bytes()),
1197+
Some("c".as_bytes())
1198+
);
11541199

1155-
#[test]
1156-
fn test_binary_min_max_all_nulls() {
1157-
let a = BinaryArray::from(vec![None, None]);
1158-
assert_eq!(None, min_binary(&a));
1159-
assert_eq!(None, max_binary(&a));
1160-
}
1200+
test_binary!(test_binary_min_max_all_nulls, vec![None, None], None, None);
11611201

1162-
#[test]
1163-
fn test_binary_min_max_1() {
1164-
let a = BinaryArray::from(vec![None, None, Some("b".as_bytes()), Some(b"a")]);
1165-
assert_eq!(Some("a".as_bytes()), min_binary(&a));
1166-
assert_eq!(Some("b".as_bytes()), max_binary(&a));
1167-
}
1168-
1169-
#[test]
1170-
fn test_string_min_max_with_nulls() {
1171-
let a = StringArray::from(vec![Some("b"), None, None, Some("a"), Some("c")]);
1172-
assert_eq!(Some("a"), min_string(&a));
1173-
assert_eq!(Some("c"), max_string(&a));
1202+
test_binary!(
1203+
test_binary_min_max_1,
1204+
vec![
1205+
None,
1206+
Some("b01234567890123435".as_bytes()), // long bytes for view types
1207+
None,
1208+
Some(b"b0123xxxxxxxxxxx"),
1209+
Some(b"a")
1210+
],
1211+
Some("a".as_bytes()),
1212+
Some("b0123xxxxxxxxxxx".as_bytes())
1213+
);
1214+
1215+
macro_rules! test_string {
1216+
($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => {
1217+
#[test]
1218+
fn $NAME() {
1219+
let string = StringArray::from($ARRAY);
1220+
assert_eq!($EXPECTED_MIN, min_string(&string));
1221+
assert_eq!($EXPECTED_MAX, max_string(&string));
1222+
1223+
let large_string = LargeStringArray::from($ARRAY);
1224+
assert_eq!($EXPECTED_MIN, min_string(&large_string));
1225+
assert_eq!($EXPECTED_MAX, max_string(&large_string));
1226+
1227+
let string_view = StringViewArray::from($ARRAY);
1228+
assert_eq!($EXPECTED_MIN, min_string_view(&string_view));
1229+
assert_eq!($EXPECTED_MAX, max_string_view(&string_view));
1230+
}
1231+
};
11741232
}
11751233

1176-
#[test]
1177-
fn test_string_min_max_all_nulls() {
1178-
let v: Vec<Option<&str>> = vec![None, None];
1179-
let a = StringArray::from(v);
1180-
assert_eq!(None, min_string(&a));
1181-
assert_eq!(None, max_string(&a));
1182-
}
1234+
test_string!(
1235+
test_string_min_max_with_nulls,
1236+
vec![
1237+
Some("b012345678901234"), // long bytes for view types
1238+
None,
1239+
None,
1240+
Some("a"),
1241+
Some("c"),
1242+
Some("b0123xxxxxxxxxxx")
1243+
],
1244+
Some("a"),
1245+
Some("c")
1246+
);
1247+
1248+
test_string!(
1249+
test_string_min_max_no_null,
1250+
vec![
1251+
Some("b"),
1252+
Some("b012345678901234"), // long bytes for view types
1253+
Some("a"),
1254+
Some("b012xxxxxxxxxxxx")
1255+
],
1256+
Some("a"),
1257+
Some("b012xxxxxxxxxxxx")
1258+
);
1259+
1260+
test_string!(
1261+
test_string_min_max_all_nulls,
1262+
Vec::<Option<&str>>::from_iter([None, None]),
1263+
None,
1264+
None
1265+
);
11831266

1184-
#[test]
1185-
fn test_string_min_max_1() {
1186-
let a = StringArray::from(vec![None, None, Some("b"), Some("a")]);
1187-
assert_eq!(Some("a"), min_string(&a));
1188-
assert_eq!(Some("b"), max_string(&a));
1189-
}
1267+
test_string!(
1268+
test_string_min_max_1,
1269+
vec![
1270+
None,
1271+
Some("c12345678901234"), // long bytes for view types
1272+
None,
1273+
Some("b"),
1274+
Some("c1234xxxxxxxxxx")
1275+
],
1276+
Some("b"),
1277+
Some("c1234xxxxxxxxxx")
1278+
);
1279+
1280+
test_string!(
1281+
test_string_min_max_empty,
1282+
Vec::<Option<&str>>::new(),
1283+
None,
1284+
None
1285+
);
11901286

11911287
#[test]
11921288
fn test_boolean_min_max_empty() {

0 commit comments

Comments
 (0)