Skip to content

Commit 06bcf8d

Browse files
committed
Unsafe improvements: arrow/{array_reader,buffer}.
1 parent 8355823 commit 06bcf8d

8 files changed

+25
-2
lines changed

parquet/src/arrow/array_reader/fixed_len_byte_array.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,12 @@ impl ArrayReader for FixedLenByteArrayReader {
160160
.add_buffer(Buffer::from_vec(record_data.buffer))
161161
.null_bit_buffer(self.record_reader.consume_bitmap_buffer());
162162

163+
// SAFETY: Assuming that a RecordReader produces a valid FixedLenByteArrayBuffer (as
164+
// otherwise that code itself would be unsound), this call is safe if self.byte_length
165+
// matches the byte length of the FixedLenByteArrayBuffer produced by the RecordReader
166+
// matches the byte length passed as a type parameter to ArrayDataBuilder.
167+
// FIXME: how do we know that the byte lengths match? What if record_data.byte_length
168+
// is None?
163169
let binary = FixedSizeBinaryArray::from(unsafe { array_data.build_unchecked() });
164170

165171
// TODO: An improvement might be to do this conversion on read

parquet/src/arrow/array_reader/fixed_size_list_array.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ impl ArrayReader for FixedSizeListArrayReader {
202202
list_builder = list_builder.null_bit_buffer(Some(builder.into()));
203203
}
204204

205+
// SAFETY: FIXME: document why this call is safe.
205206
let list_data = unsafe { list_builder.build_unchecked() };
206207

207208
let result_array = FixedSizeListArray::from(list_data);

parquet/src/arrow/array_reader/list_array.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ impl<OffsetSize: OffsetSizeTrait> ArrayReader for ListArrayReader<OffsetSize> {
225225
data_builder = data_builder.null_bit_buffer(Some(builder.into()))
226226
}
227227

228+
// SAFETY: FIXME: document why this call is safe.
228229
let list_data = unsafe { data_builder.build_unchecked() };
229230

230231
let result_array = GenericListArray::<OffsetSize>::from(list_data);

parquet/src/arrow/array_reader/map_array.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ impl ArrayReader for MapArrayReader {
9999
let data = array.to_data();
100100
let builder = data.into_builder().data_type(self.data_type.clone());
101101

102-
// SAFETY - we can assume that ListArrayReader produces valid ListArray
102+
// SAFETY: we can assume that ListArrayReader produces valid ListArray
103103
// of the expected type, and as such its output can be reinterpreted as
104104
// a MapArray without validation
105105
Ok(Arc::new(MapArray::from(unsafe {

parquet/src/arrow/array_reader/primitive_array.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,11 @@ where
179179
.add_buffer(record_data)
180180
.null_bit_buffer(self.record_reader.consume_bitmap_buffer());
181181

182+
// SAFETY: Assuming that the RecordReader's null buffer is sufficiently sized for the
183+
// corresponding Vec<T> buffer (or None), this is safe, as we have a single buffer
184+
// obtained from a vector of `T`s, which must therefore have a valid memory representation
185+
// for T. The type mapping above ensures that a valid T results in a valid Arrow array of
186+
// type `arrow_data_type`.
182187
let array_data = unsafe { array_data.build_unchecked() };
183188
let array: ArrayRef = match T::get_physical_type() {
184189
PhysicalType::BOOLEAN => Arc::new(BooleanArray::from(array_data)),

parquet/src/arrow/array_reader/struct_array.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ impl ArrayReader for StructArrayReader {
173173
array_data_builder.null_bit_buffer(Some(bitmap_builder.into()));
174174
}
175175

176+
// SAFETY: FIXME: document why this call is safe.
176177
let array_data = unsafe { array_data_builder.build_unchecked() };
177178
Ok(Arc::new(StructArray::from(array_data)))
178179
}

parquet/src/arrow/buffer/dictionary_buffer.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ impl<K: ArrowNativeType + Ord, V: OffsetSizeTrait> DictionaryBuffer<K, V> {
162162

163163
let data = match cfg!(debug_assertions) {
164164
true => builder.build().unwrap(),
165+
// SAFETY: FIXME: this is unsound. data_type is passed by the caller without
166+
// any validation, which might result in creating invalid arrays, and this is a
167+
// safe function which cannot have preconditions. Similar considerations apply
168+
// to null_buffer.
165169
false => unsafe { builder.build_unchecked() },
166170
};
167171

parquet/src/arrow/buffer/offset_buffer.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,10 @@ impl<I: OffsetSizeTrait> OffsetBuffer<I> {
146146

147147
let data = match cfg!(debug_assertions) {
148148
true => array_data_builder.build().unwrap(),
149+
// SAFETY: FIXME: this is unsound. data_type is passed by the caller without
150+
// any validation, which might result in creating invalid arrays, and this is a
151+
// safe function which cannot have preconditions. Similar considerations apply
152+
// to null_buffer.
149153
false => unsafe { array_data_builder.build_unchecked() },
150154
};
151155

@@ -164,7 +168,8 @@ impl<I: OffsetSizeTrait> OffsetBuffer<I> {
164168
let len = (end - start).to_usize().unwrap();
165169

166170
if len != 0 {
167-
// Safety: (1) the buffer is valid (2) the offsets are valid (3) the values in between are of ByteViewType
171+
// SAFETY: (1) the buffer is valid (2) the offsets are valid (3) the values in between are of ByteViewType.
172+
// This follows from `self` being itself valid.
168173
unsafe {
169174
builder.append_view_unchecked(block, start.as_usize() as u32, len as u32);
170175
}

0 commit comments

Comments
 (0)