-
Notifications
You must be signed in to change notification settings - Fork 1k
Open
Labels
Description
Describe the bug
It is unclear to me if this is an issue when building or checking the Bloom filter; but either way, building a Bloom filter with i8
or i16
values (as opposed to i32
or i64
) always returns false
when checked.
To Reproduce
diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs
index 18c8617e07..d6b14e2899 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -2039,6 +2039,36 @@ mod tests {
values_required::<BinaryArray, _>(many_vecs_iter);
}
+ #[test]
+ fn i8_column_bloom_filter() {
+ let array = Arc::new(Int8Array::from_iter(0..SMALL_SIZE as i8));
+ let mut options = RoundTripOptions::new(array, false);
+ options.bloom_filter = true;
+
+ let files = one_column_roundtrip_with_options(options);
+ check_bloom_filter(
+ files,
+ "col".to_string(),
+ (0..SMALL_SIZE as i8).collect(),
+ (SMALL_SIZE as i8 + 1..SMALL_SIZE as i8 + 10).collect(),
+ );
+ }
+
+ #[test]
+ fn i16_column_bloom_filter() {
+ let array = Arc::new(Int16Array::from_iter(0..SMALL_SIZE as i16));
+ let mut options = RoundTripOptions::new(array, false);
+ options.bloom_filter = true;
+
+ let files = one_column_roundtrip_with_options(options);
+ check_bloom_filter(
+ files,
+ "col".to_string(),
+ (0..SMALL_SIZE as i16).collect(),
+ (SMALL_SIZE as i16 + 1..SMALL_SIZE as i16 + 10).collect(),
+ );
+ }
+
#[test]
fn i32_column_bloom_filter() {
let array = Arc::new(Int32Array::from_iter(0..SMALL_SIZE as i32));
@@ -2054,6 +2084,21 @@ mod tests {
);
}
+ #[test]
+ fn i64_column_bloom_filter() {
+ let array = Arc::new(Int64Array::from_iter(0..SMALL_SIZE as i64));
+ let mut options = RoundTripOptions::new(array, false);
+ options.bloom_filter = true;
+
+ let files = one_column_roundtrip_with_options(options);
+ check_bloom_filter(
+ files,
+ "col".to_string(),
+ (0..SMALL_SIZE as i64).collect(),
+ (SMALL_SIZE as i64 + 1..SMALL_SIZE as i64 + 10).collect(),
+ );
+ }
+
#[test]
fn binary_column_bloom_filter() {
let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect();
returns:
failures:
---- arrow::arrow_writer::tests::i16_column_bloom_filter stdout ----
thread 'arrow::arrow_writer::tests::i16_column_bloom_filter' panicked at parquet/src/arrow/arrow_writer/mod.rs:1792:17:
Value [0, 0] should be in bloom filter
---- arrow::arrow_writer::tests::i8_column_bloom_filter stdout ----
thread 'arrow::arrow_writer::tests::i8_column_bloom_filter' panicked at parquet/src/arrow/arrow_writer/mod.rs:1792:17:
Value [0] should be in bloom filter
failures:
arrow::arrow_writer::tests::i16_column_bloom_filter
arrow::arrow_writer::tests::i8_column_bloom_filter
Expected behavior
These tests should pass
Additional context
I found this from Datafusion: apache/datafusion#9779