apache · Dandandan · Jul 3, 2025 · Jul 3, 2025 · Jul 3, 2025 · Jul 3, 2025
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
@@ -311,51 +311,71 @@ fn sort_byte_view<T: ByteViewType>(
     limit: Option<usize>,
 ) -> UInt32Array {
     // 1. Build a list of (index, raw_view, length)
-    let mut valids: Vec<_> = value_indices
-        .into_iter()
-        .map(|idx| {
-            // SAFETY: we know idx < values.len()
-            let raw = unsafe { *values.views().get_unchecked(idx as usize) };
-            let len = raw as u32; // lower 32 bits encode length
-            (idx, raw, len)
-        })
-        .collect();
-
+    let mut valids: Vec<_>;
     // 2. Compute the number of non-null entries to partially sort
-    let vlimit = match (limit, options.nulls_first) {
-        (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
-        _ => valids.len(),
+    let vlimit: usize = match (limit, options.nulls_first) {
+        (Some(l), true) => l.saturating_sub(nulls.len()).min(value_indices.len()),
+        _ => value_indices.len(),
     };
+    // 3.a Check if all views are inline (no data buffers)
+    if values.data_buffers().is_empty() {
+        valids = value_indices
+            .into_iter()
+            .map(|idx| {
+                // SAFETY: we know idx < values.len()
+                let raw = unsafe { *values.views().get_unchecked(idx as usize) };
+                let inline_key = GenericByteViewArray::<T>::inline_key_fast(raw);
+                (idx, inline_key)
+            })
+            .collect();
+        let cmp_inline = |a: &(u32, u128), b: &(u32, u128)| a.1.cmp(&b.1);
 
-    // 3. Mixed comparator: first prefix, then inline vs full comparison
-    let cmp_mixed = |a: &(u32, u128, u32), b: &(u32, u128, u32)| {
-        let (_, raw_a, len_a) = *a;
-        let (_, raw_b, len_b) = *b;
-
-        // 3.1 Both inline (≤12 bytes): compare full 128-bit key including length
-        if len_a <= MAX_INLINE_VIEW_LEN && len_b <= MAX_INLINE_VIEW_LEN {
-            return GenericByteViewArray::<T>::inline_key_fast(raw_a)
-                .cmp(&GenericByteViewArray::<T>::inline_key_fast(raw_b));
+        // Partially sort according to ascending/descending
+        if !options.descending {
+            sort_unstable_by(&mut valids, vlimit, cmp_inline);
+        } else {
+            sort_unstable_by(&mut valids, vlimit, |x, y| cmp_inline(x, y).reverse());
         }
+    } else {
+        valids = value_indices
+            .into_iter()
+            .map(|idx| {
+                // SAFETY: we know idx < values.len()
+                let raw = unsafe { *values.views().get_unchecked(idx as usize) };
+                (idx, raw)
+            })
+            .collect();
+        // 3.b Mixed comparator: first prefix, then inline vs full comparison
+        let cmp_mixed = |a: &(u32, u128), b: &(u32, u128)| {
+            let (_, raw_a) = *a;
+            let (_, raw_b) = *b;
+            let len_a = raw_a as u32;
+            let len_b = raw_b as u32;
+            // 3.b.1 Both inline (≤12 bytes): compare full 128-bit key including length
+            if len_a <= MAX_INLINE_VIEW_LEN && len_b <= MAX_INLINE_VIEW_LEN {
+                return GenericByteViewArray::<T>::inline_key_fast(raw_a)
+                    .cmp(&GenericByteViewArray::<T>::inline_key_fast(raw_b));
+            }
 
-        // 3.2 Compare 4-byte prefix in big-endian order
-        let pref_a = ByteView::from(raw_a).prefix.swap_bytes();
-        let pref_b = ByteView::from(raw_b).prefix.swap_bytes();
-        if pref_a != pref_b {
-            return pref_a.cmp(&pref_b);
-        }
+            // 3.b.2 Compare 4-byte prefix in big-endian order
+            let pref_a = ByteView::from(raw_a).prefix.swap_bytes();
+            let pref_b = ByteView::from(raw_b).prefix.swap_bytes();
+            if pref_a != pref_b {
+                return pref_a.cmp(&pref_b);
+            }
 
-        // 3.3 Fallback to full byte-slice comparison
-        let full_a: &[u8] = unsafe { values.value_unchecked(a.0 as usize).as_ref() };
-        let full_b: &[u8] = unsafe { values.value_unchecked(b.0 as usize).as_ref() };
-        full_a.cmp(full_b)
-    };
+            // 3.b.3 Fallback to full byte-slice comparison
+            let full_a: &[u8] = unsafe { values.value_unchecked(a.0 as usize).as_ref() };
+            let full_b: &[u8] = unsafe { values.value_unchecked(b.0 as usize).as_ref() };
+            full_a.cmp(full_b)
+        };
 
-    // 4. Partially sort according to ascending/descending
-    if !options.descending {
-        sort_unstable_by(&mut valids, vlimit, cmp_mixed);
-    } else {
-        sort_unstable_by(&mut valids, vlimit, |x, y| cmp_mixed(x, y).reverse());
+        // 3.b.4 Partially sort according to ascending/descending
+        if !options.descending {
+            sort_unstable_by(&mut valids, vlimit, cmp_mixed);
+        } else {
+            sort_unstable_by(&mut valids, vlimit, |x, y| cmp_mixed(x, y).reverse());
+        }
     }
 
     // 5. Assemble nulls and sorted indices into final output
@@ -367,10 +387,10 @@ fn sort_byte_view<T: ByteViewType>(
         // Place null indices first
         out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
         let rem = out_limit - out.len();
-        out.extend(valids.iter().map(|&(i, _, _)| i).take(rem));
+        out.extend(valids.iter().map(|&(i, _)| i).take(rem));
     } else {
         // Place non-null indices first
-        out.extend(valids.iter().map(|&(i, _, _)| i).take(out_limit));
+        out.extend(valids.iter().map(|&(i, _)| i).take(out_limit));
         let rem = out_limit - out.len();
         out.extend_from_slice(&nulls[..rem]);
     }