Skip to content
6 changes: 6 additions & 0 deletions arrow-array/benches/view_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ fn criterion_benchmark(c: &mut Criterion) {
black_box(array.slice(0, 100_000 / 2));
});
});

c.bench_function("view types slice", |b| {
b.iter(|| {
black_box(array.slice(0, 100_000 / 2));
});
});
}

criterion_group!(benches, criterion_benchmark);
Expand Down
40 changes: 26 additions & 14 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ use crate::builder::{ArrayBuilder, GenericByteViewBuilder};
use crate::iterator::ArrayIter;
use crate::types::bytes::ByteArrayNativeType;
use crate::types::{BinaryViewType, ByteViewType, StringViewType};
use crate::{Array, ArrayAccessor, ArrayRef, GenericByteArray, OffsetSizeTrait, Scalar};
use crate::{
Array, ArrayAccessor, ArrayRef, GenericByteArray, OffsetSizeTrait, Scalar, ViewBuffers,
};
use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, ScalarBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder, ByteView, MAX_INLINE_VIEW_LEN};
use arrow_schema::{ArrowError, DataType};
Expand Down Expand Up @@ -164,7 +166,7 @@ use super::ByteArrayType;
pub struct GenericByteViewArray<T: ByteViewType + ?Sized> {
data_type: DataType,
views: ScalarBuffer<u128>,
buffers: Vec<Buffer>,
buffers: ViewBuffers,
phantom: PhantomData<T>,
nulls: Option<NullBuffer>,
}
Expand All @@ -187,7 +189,11 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
/// # Panics
///
/// Panics if [`GenericByteViewArray::try_new`] returns an error
pub fn new(views: ScalarBuffer<u128>, buffers: Vec<Buffer>, nulls: Option<NullBuffer>) -> Self {
pub fn new(
views: ScalarBuffer<u128>,
buffers: impl Into<ViewBuffers>,
nulls: Option<NullBuffer>,
) -> Self {
Self::try_new(views, buffers, nulls).unwrap()
}

Expand All @@ -199,9 +205,11 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
/// * [ByteViewType::validate] fails
pub fn try_new(
views: ScalarBuffer<u128>,
buffers: Vec<Buffer>,
buffers: impl Into<ViewBuffers>,
nulls: Option<NullBuffer>,
) -> Result<Self, ArrowError> {
let buffers = buffers.into();

T::validate(&views, &buffers)?;

if let Some(n) = nulls.as_ref() {
Expand Down Expand Up @@ -231,7 +239,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
/// Safe if [`Self::try_new`] would not error
pub unsafe fn new_unchecked(
views: ScalarBuffer<u128>,
buffers: Vec<Buffer>,
buffers: impl Into<ViewBuffers>,
nulls: Option<NullBuffer>,
) -> Self {
if cfg!(feature = "force_validate") {
Expand All @@ -242,7 +250,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
data_type: T::DATA_TYPE,
phantom: Default::default(),
views,
buffers,
buffers: buffers.into(),
nulls,
}
}
Expand All @@ -252,7 +260,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
Self {
data_type: T::DATA_TYPE,
views: vec![0; len].into(),
buffers: vec![],
buffers: vec![].into(),
nulls: Some(NullBuffer::new_null(len)),
phantom: Default::default(),
}
Expand All @@ -278,7 +286,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
}

/// Deconstruct this array into its constituent parts
pub fn into_parts(self) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
pub fn into_parts(self) -> (ScalarBuffer<u128>, ViewBuffers, Option<NullBuffer>) {
(self.views, self.buffers, self.nulls)
}

Expand Down Expand Up @@ -609,8 +617,10 @@ impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {

fn shrink_to_fit(&mut self) {
self.views.shrink_to_fit();
self.buffers.iter_mut().for_each(|b| b.shrink_to_fit());
self.buffers.shrink_to_fit();
self.buffers
.make_mut()
.iter_mut()
.for_each(|b| b.shrink_to_fit());
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
Expand Down Expand Up @@ -672,7 +682,7 @@ impl<T: ByteViewType + ?Sized> From<ArrayData> for GenericByteViewArray<T> {
Self {
data_type: T::DATA_TYPE,
views,
buffers,
buffers: buffers.into(),
nulls: value.nulls().cloned(),
phantom: Default::default(),
}
Expand Down Expand Up @@ -736,12 +746,14 @@ where
}

impl<T: ByteViewType + ?Sized> From<GenericByteViewArray<T>> for ArrayData {
fn from(mut array: GenericByteViewArray<T>) -> Self {
fn from(array: GenericByteViewArray<T>) -> Self {
let len = array.len();
array.buffers.insert(0, array.views.into_inner());
let mut buffers = array.buffers.unwrap_or_clone();
buffers.insert(0, array.views.into_inner());

let builder = ArrayDataBuilder::new(T::DATA_TYPE)
.len(len)
.buffers(array.buffers)
.buffers(buffers)
.nulls(array.nulls);

unsafe { builder.build_unchecked() }
Expand Down
2 changes: 2 additions & 0 deletions arrow-array/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,8 @@ pub mod temporal_conversions;
pub mod timezone;
mod trusted_len;
pub mod types;
mod view_buffers;
pub use view_buffers::ViewBuffers;

#[cfg(test)]
mod tests {
Expand Down
61 changes: 61 additions & 0 deletions arrow-array/src/view_buffers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use std::{ops::Deref, sync::Arc};

use arrow_buffer::Buffer;

/// A cheaply cloneable, owned slice of [`Buffer`]
///
/// Similar to `Arc<Vec<Buffer>>` or `Arc<[Buffer]>`
#[derive(Clone, Debug)]
pub struct ViewBuffers(Arc<Vec<Buffer>>);

impl ViewBuffers {
/// Return a mutable reference to the underlying buffers, copying the buffers if necessary.
pub fn make_mut(&mut self) -> &mut Vec<Buffer> {
// If the underlying Arc is unique, we can mutate it in place
Arc::make_mut(&mut self.0)
}

/// Convertes this ViewBuffers into a Vec<Buffer>, cloning the underlying buffers if
/// they are shared.
pub fn unwrap_or_clone(self) -> Vec<Buffer> {
Arc::unwrap_or_clone(self.0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah this is nice.

}
}

impl FromIterator<Buffer> for ViewBuffers {
fn from_iter<T: IntoIterator<Item = Buffer>>(iter: T) -> Self {
let v: Vec<_> = iter.into_iter().collect();
Self(v.into())
}
}

impl From<Vec<Buffer>> for ViewBuffers {
fn from(value: Vec<Buffer>) -> Self {
Self(value.into())
}
}

impl Deref for ViewBuffers {
type Target = [Buffer];

fn deref(&self) -> &Self::Target {
self.0.as_ref()
}
}
Loading