-
Notifications
You must be signed in to change notification settings - Fork 935
Commit 3c5d3f5
Add Parquet Modular encryption support (write) (#7111)
* Start encryption
* Work
* Work
* Pass Encryptor to SerializedRowGroupWriter
* Expand test, pass FileEncryptionProperties instead of FileEncryptor to SerializedPageWriter, encrypt pages
* Add encrypt_object helper
* Implement serialization of column crypto metadata
* Fix writing Parquet magic bytes
* Add key metadata to file encryption properties
* Generate unique file aad and add prefix if set
* Add aad param to encrypt_object and don't require TrackedWrite
* Write file crypto metadata
* Set column crypto metadata
* Store file_aad and aad_file_unique in FileEncryptor
* Work towards using correct AADs
* Fix writing ciphertext length
* Add check of ciphertext length
* Ugly workaround for setting compressed page size
* Fix test logic
* Add page_ordinal
* Add some feature flags
* Add page_ordinal, row_group_ordinal and column_ordinal to SerializedPageWriter
* minor changes
* clippy fixes
* Encapsulate page encryption context in a PageEncryptor struct
* SerializedRowGroupWriter.column_index starts at 1 not 0
* Fix handling dictionary pages and update test
* Fix clippy issues
* clippy
* Use PageEncryptor in ArrowPageWriter
* Tidy up feature handling and reduce duplication
* Test fixes
* Fix setting Arrow page writer for byte typed columns
* WIP Add per-column encryption keys
* Add test_non_uniform_encryption
* lint
* lint
* Add SchemaRef ArrowColumnWriterFactory to get column_path via column_index
* Get column path from descriptor rather than Arrow schema
* Fix writing multiple encrypted pages with ArrowPageWriter
* Return encryptors as a Result<Box<dyn BlockEncryptor>>
* Get per-column encryption working and various tidy ups
* Handle non-encrypted columns
* Tidy up some duplication
* Add encryption_util module for tests
diff --git c/parquet/src/arrow/arrow_writer/mod.rs i/parquet/src/arrow/arrow_writer/mod.rs
index 7f37401..92727f7 100644
--- c/parquet/src/arrow/arrow_writer/mod.rs
+++ i/parquet/src/arrow/arrow_writer/mod.rs
@@ -1290,8 +1290,6 @@ mod tests {
use std::fs::File;
- #[cfg(feature = "encryption")]
- use crate::arrow::arrow_reader::tests::verify_encryption_test_file_read;
#[cfg(feature = "encryption")]
use crate::arrow::arrow_reader::ArrowReaderMetadata;
#[cfg(feature = "encryption")]
@@ -1300,6 +1298,8 @@ mod tests {
use crate::arrow::ARROW_SCHEMA_META_KEY;
#[cfg(feature = "encryption")]
use crate::encryption::encrypt::EncryptionKey;
+ #[cfg(feature = "encryption")]
+ use crate::util::test_common::encryption_util::verify_encryption_test_file_read;
use arrow::datatypes::ToByteSlice;
use arrow::datatypes::{DataType, Schema};
use arrow::error::Result as ArrowResult;
* Add uniform encryption test
* lint
diff --git c/parquet/tests/arrow_reader/encryption_util.rs i/parquet/tests/arrow_reader/encryption_util.rs
index 627ac66..604cd30 100644
--- c/parquet/tests/arrow_reader/encryption_util.rs
+++ i/parquet/tests/arrow_reader/encryption_util.rs
@@ -15,13 +15,17 @@
// specific language governing permissions and limitations
// under the License.
+use crate::arrow::arrow_reader::{
+ ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder,
+};
use arrow_array::cast::AsArray;
use arrow_array::{types, RecordBatch};
-use futures::TryStreamExt;
+
use std::fs::File;
use parquet::file::metadata::ParquetMetaData;
use crate::arrow::ArrowWriter;
use crate::encryption::encrypt::FileEncryptionProperties;
+use crate::encryption::decrypt::FileDecryptionProperties;
use crate::file::properties::WriterProperties;
/// Tests reading an encrypted file from the parquet-testing repository
@@ -109,18 +113,24 @@ pub fn verify_encryption_test_data(record_batches: Vec<RecordBatch>, metadata: &
}
#[cfg(feature = "encryption")]
-pub fn read_and_roundtrip_to_encrypted_file(path: &str, decryption_properties: FileDecryptionProperties, encryption_properties: FileEncryptionProperties) {
+pub fn read_and_roundtrip_to_encrypted_file(
+ path: &str,
+ decryption_properties: FileDecryptionProperties,
+ encryption_properties: FileEncryptionProperties,
+) {
let temp_file = tempfile::tempfile().unwrap();
// read example data
let file = File::open(path).unwrap();
- let options =
- ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties.clone());
+ let options = ArrowReaderOptions::default()
+ .with_file_decryption_properties(decryption_properties.clone());
let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap();
let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap();
let batch_reader = builder.build().unwrap();
- let batches = batch_reader.collect::<crate::errors::Result<Vec<RecordBatch>, _>>().unwrap();
+ let batches = batch_reader
+ .collect::<crate::errors::Result<Vec<RecordBatch>, _>>()
+ .unwrap();
// write example data
let props = WriterProperties::builder()
* post rebase
diff --git c/parquet/README.md i/parquet/README.md
index 9245664..8fc72bf 100644
--- c/parquet/README.md
+++ i/parquet/README.md
@@ -84,7 +84,7 @@ The `parquet` crate provides the following features which may be enabled in your
- [ ] Row record writer
- [x] Arrow record writer
- [x] Async support
- - [ ] Encrypted files
+ - [x] Encrypted files
- [x] Predicate pushdown
- [x] Parquet format 4.0.0 support
diff --git c/parquet/examples/read_with_rowgroup.rs i/parquet/examples/read_with_rowgroup.rs
index 44d2559..8cccc7f 100644
--- c/parquet/examples/read_with_rowgroup.rs
+++ i/parquet/examples/read_with_rowgroup.rs
@@ -35,12 +35,7 @@ async fn main() -> Result<()> {
let mut file = File::open(&path).await.unwrap();
// The metadata could be cached in other places, this example only shows how to read
- let metadata = file
- .get_metadata(
- #[cfg(feature = "encryption")]
- None,
- )
- .await?;
+ let metadata = file.get_metadata().await?;
for rg in metadata.row_groups() {
let mut rowgroup = InMemoryRowGroup::create(rg.clone(), ProjectionMask::all());
@@ -126,8 +121,6 @@ impl RowGroups for InMemoryRowGroup {
self.metadata.column(i),
self.num_rows(),
None,
- #[cfg(feature = "encryption")]
- None,
)?);
Ok(Box::new(ColumnChunkIterator {
diff --git c/parquet/src/arrow/arrow_writer/mod.rs i/parquet/src/arrow/arrow_writer/mod.rs
index 0e087b1..527e79e 100644
--- c/parquet/src/arrow/arrow_writer/mod.rs
+++ i/parquet/src/arrow/arrow_writer/mod.rs
@@ -1311,9 +1311,7 @@ mod tests {
use crate::basic::Encoding;
use crate::data_type::AsBytes;
#[cfg(feature = "encryption")]
- use crate::encryption::{
- decryption::FileDecryptionProperties, encrypt::FileEncryptionProperties,
- };
+ use crate::encryption::{decrypt::FileDecryptionProperties, encrypt::FileEncryptionProperties};
use crate::file::metadata::ParquetMetaData;
use crate::file::page_index::index::Index;
use crate::file::page_index::index_reader::read_offset_indexes;
@@ -3812,8 +3810,8 @@ mod tests {
let column_2_key = "1234567890123451".as_bytes();
let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
- .with_column_key("double_field".as_bytes().to_vec(), column_1_key.to_vec())
- .with_column_key("float_field".as_bytes().to_vec(), column_2_key.to_vec())
+ .with_column_key("double_field", column_1_key.to_vec())
+ .with_column_key("float_field", column_2_key.to_vec())
.build()
.unwrap();
diff --git c/parquet/src/arrow/async_reader/metadata.rs i/parquet/src/arrow/async_reader/metadata.rs
index 4f41cd6..71d2e57 100644
--- c/parquet/src/arrow/async_reader/metadata.rs
+++ i/parquet/src/arrow/async_reader/metadata.rs
@@ -128,26 +128,13 @@ impl<F: MetadataFetch> MetadataLoader<F> {
let (metadata, remainder) = if length > suffix_len - FOOTER_SIZE {
let metadata_start = file_size - length - FOOTER_SIZE;
let meta = fetch.fetch(metadata_start..file_size - FOOTER_SIZE).await?;
- (
- ParquetMetaDataReader::decode_metadata(
- &meta,
- footer.is_encrypted_footer(),
- #[cfg(feature = "encryption")]
- None,
- )?,
- None,
- )
+ (ParquetMetaDataReader::decode_metadata(&meta)?, None)
} else {
let metadata_start = file_size - length - FOOTER_SIZE - footer_start;
let slice = &suffix[metadata_start..suffix_len - FOOTER_SIZE];
(
- ParquetMetaDataReader::decode_metadata(
- slice,
- footer.is_encrypted_footer(),
- #[cfg(feature = "encryption")]
- None,
- )?,
+ ParquetMetaDataReader::decode_metadata(slice)?,
Some((footer_start, suffix.slice(..metadata_start))),
)
};
diff --git c/parquet/src/column/writer/mod.rs i/parquet/src/column/writer/mod.rs
index 32139e6..91259ab 100644
--- c/parquet/src/column/writer/mod.rs
+++ i/parquet/src/column/writer/mod.rs
@@ -1542,9 +1542,7 @@ mod tests {
reader::{get_column_reader, get_typed_column_reader, ColumnReaderImpl},
};
#[cfg(feature = "encryption")]
- use crate::encryption::{
- decryption::FileDecryptionProperties, encrypt::FileEncryptionProperties,
- };
+ use crate::encryption::{decrypt::FileDecryptionProperties, encrypt::FileEncryptionProperties};
use crate::file::writer::TrackedWrite;
use crate::file::{
properties::ReaderProperties, reader::SerializedPageReader, writer::SerializedPageWriter,
@@ -2126,8 +2124,6 @@ mod tests {
r.rows_written as usize,
None,
Arc::new(props),
- #[cfg(feature = "encryption")]
- None,
)
.unwrap();
@@ -2180,8 +2176,6 @@ mod tests {
r.rows_written as usize,
None,
Arc::new(props),
- #[cfg(feature = "encryption")]
- None,
)
.unwrap();
@@ -2317,8 +2311,6 @@ mod tests {
r.rows_written as usize,
None,
Arc::new(props),
- #[cfg(feature = "encryption")]
- None,
)
.unwrap(),
);
@@ -3543,7 +3535,7 @@ mod tests {
let _file_metadata = writer.close().unwrap();
let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
- .with_column_key(b"a".to_vec(), column_key.key().clone())
+ .with_column_key("a", column_key.key().clone())
.build()
.unwrap();
let options = ArrowReaderOptions::default()
@@ -3955,8 +3947,6 @@ mod tests {
result.rows_written as usize,
None,
Arc::new(props),
- #[cfg(feature = "encryption")]
- None,
)
.unwrap(),
);
diff --git c/parquet/src/encryption/ciphers.rs i/parquet/src/encryption/ciphers.rs
index 88ec2a7..9c6ce23 100644
--- c/parquet/src/encryption/ciphers.rs
+++ i/parquet/src/encryption/ciphers.rs
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+use crate::errors::ParquetError;
use crate::errors::ParquetError::General;
use crate::errors::Result;
use ring::aead::{Aad, LessSafeKey, NonceSequence, UnboundKey, AES_128_GCM};
@@ -167,7 +168,7 @@ mod tests {
fn test_round_trip() {
let key = [0u8; 16];
let mut encryptor = RingGcmBlockEncryptor::new(&key).unwrap();
- let decryptor = RingGcmBlockDecryptor::new(&key);
+ let decryptor = RingGcmBlockDecryptor::new(&key).unwrap();
let plaintext = b"hello, world!";
let aad = b"some aad";
diff --git c/parquet/src/encryption/decryption.rs i/parquet/src/encryption/decryption.rs
deleted file mode 100644
index 2c789d0..000000000
--- c/parquet/src/encryption/decryption.rs
+++ /dev/null
@@ -1,256 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::encryption::ciphers::{BlockDecryptor, RingGcmBlockDecryptor};
-use crate::encryption::modules::{create_module_aad, ModuleType};
-use crate::errors::Result;
-use std::collections::HashMap;
-use std::io::Read;
-use std::sync::Arc;
-
-pub fn read_and_decrypt<T: Read>(
- decryptor: &Arc<dyn BlockDecryptor>,
- input: &mut T,
- aad: &[u8],
-) -> Result<Vec<u8>> {
- let mut len_bytes = [0; 4];
- input.read_exact(&mut len_bytes)?;
- let ciphertext_len = u32::from_le_bytes(len_bytes) as usize;
- let mut ciphertext = vec![0; 4 + ciphertext_len];
- ciphertext[0..4].copy_from_slice(&len_bytes);
- input.read_exact(&mut ciphertext[4..])?;
-
- decryptor.decrypt(&ciphertext, aad.as_ref())
-}
-
-#[derive(Debug, Clone)]
-pub struct CryptoContext {
- pub(crate) row_group_ordinal: usize,
- pub(crate) column_ordinal: usize,
- pub(crate) page_ordinal: Option<usize>,
- pub(crate) dictionary_page: bool,
- // We have separate data and metadata decryptors because
- // in GCM CTR mode, the metadata and data pages use
- // different algorithms.
- data_decryptor: Arc<dyn BlockDecryptor>,
- metadata_decryptor: Arc<dyn BlockDecryptor>,
- file_aad: Vec<u8>,
-}
-
-impl CryptoContext {
- pub fn new(
- row_group_ordinal: usize,
- column_ordinal: usize,
- data_decryptor: Arc<dyn BlockDecryptor>,
- metadata_decryptor: Arc<dyn BlockDecryptor>,
- file_aad: Vec<u8>,
- ) -> Self {
- Self {
- row_group_ordinal,
- column_ordinal,
- page_ordinal: None,
- dictionary_page: false,
- data_decryptor,
- metadata_decryptor,
- file_aad,
- }
- }
-
- pub fn with_page_ordinal(&self, page_ordinal: usize) -> Self {
- Self {
- row_group_ordinal: self.row_group_ordinal,
- column_ordinal: self.column_ordinal,
- page_ordinal: Some(page_ordinal),
- dictionary_page: false,
- data_decryptor: self.data_decryptor.clone(),
- metadata_decryptor: self.metadata_decryptor.clone(),
- file_aad: self.file_aad.clone(),
- }
- }
-
- pub(crate) fn create_page_header_aad(&self) -> Result<Vec<u8>> {
- let module_type = if self.dictionary_page {
- ModuleType::DictionaryPageHeader
- } else {
- ModuleType::DataPageHeader
- };
-
- create_module_aad(
- self.file_aad(),
- module_type,
- self.row_group_ordinal,
- self.column_ordinal,
- self.page_ordinal,
- )
- }
-
- pub(crate) fn create_page_aad(&self) -> Result<Vec<u8>> {
- let module_type = if self.dictionary_page {
- ModuleType::DictionaryPage
- } else {
- ModuleType::DataPage
- };
-
- create_module_aad(
- self.file_aad(),
- module_type,
- self.row_group_ordinal,
- self.column_ordinal,
- self.page_ordinal,
- )
- }
-
- pub fn for_dictionary_page(&self) -> Self {
- Self {
- row_group_ordinal: self.row_group_ordinal,
- column_ordinal: self.column_ordinal,
- page_ordinal: self.page_ordinal,
- dictionary_page: true,
- data_decryptor: self.data_decryptor.clone(),
- metadata_decryptor: self.metadata_decryptor.clone(),
- file_aad: self.file_aad.clone(),
- }
- }
-
- pub fn data_decryptor(&self) -> &Arc<dyn BlockDecryptor> {
- &self.data_decryptor
- }
-
- pub fn metadata_decryptor(&self) -> &Arc<dyn BlockDecryptor> {
- &self.metadata_decryptor
- }
-
- pub fn file_aad(&self) -> &Vec<u8> {
- &self.file_aad
- }
-}
-
-/// FileDecryptionProperties hold keys and AAD data required to decrypt a Parquet file.
-#[derive(Debug, Clone, PartialEq)]
-pub struct FileDecryptionProperties {
- footer_key: Vec<u8>,
- column_keys: Option<HashMap<Vec<u8>, Vec<u8>>>,
- aad_prefix: Option<Vec<u8>>,
-}
-
-impl FileDecryptionProperties {
- /// Returns a new FileDecryptionProperties builder
- pub fn builder(footer_key: Vec<u8>) -> DecryptionPropertiesBuilder {
- DecryptionPropertiesBuilder::new(footer_key)
- }
-}
-
-pub struct DecryptionPropertiesBuilder {
- footer_key: Vec<u8>,
- column_keys: Option<HashMap<Vec<u8>, Vec<u8>>>,
- aad_prefix: Option<Vec<u8>>,
-}
-
-impl DecryptionPropertiesBuilder {
- pub fn new(footer_key: Vec<u8>) -> DecryptionPropertiesBuilder {
- Self {
- footer_key,
- column_keys: None,
- aad_prefix: None,
- }
- }
-
- pub fn build(self) -> Result<FileDecryptionProperties> {
- Ok(FileDecryptionProperties {
- footer_key: self.footer_key,
- column_keys: self.column_keys,
- aad_prefix: self.aad_prefix,
- })
- }
-
- pub fn with_aad_prefix(mut self, value: Vec<u8>) -> Self {
- self.aad_prefix = Some(value);
- self
- }
-
- pub fn with_column_key(mut self, column_name: Vec<u8>, decryption_key: Vec<u8>) -> Self {
- let mut column_keys = self.column_keys.unwrap_or_default();
- column_keys.insert(column_name, decryption_key);
- self.column_keys = Some(column_keys);
- self
- }
-}
-
-#[derive(Clone, Debug)]
-pub struct FileDecryptor {
- decryption_properties: FileDecryptionProperties,
- footer_decryptor: Option<Arc<dyn BlockDecryptor>>,
- file_aad: Vec<u8>,
-}
-
-impl PartialEq for FileDecryptor {
- fn eq(&self, other: &Self) -> bool {
- self.decryption_properties == other.decryption_properties
- }
-}
-
-impl FileDecryptor {
- pub(crate) fn new(
- decryption_properties: &FileDecryptionProperties,
- aad_file_unique: Vec<u8>,
- aad_prefix: Vec<u8>,
- ) -> Self {
- let file_aad = [aad_prefix.as_slice(), aad_file_unique.as_slice()].concat();
- let footer_decryptor = RingGcmBlockDecryptor::new(&decryption_properties.footer_key);
-
- Self {
- // todo decr: if no key available yet (not set in properties, will be retrieved from metadata)
- footer_decryptor: Some(Arc::new(footer_decryptor)),
- decryption_properties: decryption_properties.clone(),
- file_aad,
- }
- }
-
- pub(crate) fn get_footer_decryptor(&self) -> Arc<dyn BlockDecryptor> {
- self.footer_decryptor.clone().unwrap()
- }
-
- pub(crate) fn get_column_data_decryptor(&self, column_name: &[u8]) -> Arc<dyn BlockDecryptor> {
- match self.decryption_properties.column_keys.as_ref() {
- None => self.get_footer_decryptor(),
- Some(column_keys) => match column_keys.get(column_name) {
- None => self.get_footer_decryptor(),
- Some(column_key) => Arc::new(RingGcmBlockDecryptor::new(column_key)),
- },
- }
- }
-
- pub(crate) fn get_column_metadata_decryptor(
- &self,
- column_name: &[u8],
- ) -> Arc<dyn BlockDecryptor> {
- // Once GCM CTR mode is implemented, data and metadata decryptors may be different
- self.get_column_data_decryptor(column_name)
- }
-
- pub(crate) fn file_aad(&self) -> &Vec<u8> {
- &self.file_aad
- }
-
- pub(crate) fn is_column_encrypted(&self, column_name: &[u8]) -> bool {
- // Column is encrypted if either uniform encryption is used or an encryption key is set for the column
- match self.decryption_properties.column_keys.as_ref() {
- None => true,
- Some(keys) => keys.contains_key(column_name),
- }
- }
-}
diff --git c/parquet/src/file/footer.rs i/parquet/src/file/footer.rs
index 5be0842..85ef30c 100644
--- c/parquet/src/file/footer.rs
+++ i/parquet/src/file/footer.rs
@@ -58,12 +58,7 @@ pub fn parse_metadata<R: ChunkReader>(chunk_reader: &R) -> Result<ParquetMetaDat
/// [Parquet Spec]: https://github.com/apache/parquet-format#metadata
#[deprecated(since = "53.1.0", note = "Use ParquetMetaDataReader::decode_metadata")]
pub fn decode_metadata(buf: &[u8]) -> Result<ParquetMetaData> {
- ParquetMetaDataReader::decode_metadata(
- buf,
- false,
- #[cfg(feature = "encryption")]
- None,
- )
+ ParquetMetaDataReader::decode_metadata(buf)
}
/// Decodes the Parquet footer returning the metadata length in bytes
diff --git c/parquet/src/file/metadata/mod.rs i/parquet/src/file/metadata/mod.rs
index 6a5eb28..6dc559c 100644
--- c/parquet/src/file/metadata/mod.rs
+++ i/parquet/src/file/metadata/mod.rs
@@ -103,14 +103,17 @@ use crate::encryption::{
};
use crate::errors::{ParquetError, Result};
#[cfg(feature = "encryption")]
-use crate::file::column_crypto_metadata::{self, ColumnCryptoMetaData};
+use crate::file::column_crypto_metadata::ColumnCryptoMetaData;
+#[cfg(feature = "encryption")]
+use crate::format::ColumnCryptoMetaData as TColumnCryptoMetaData;
+
+#[cfg(feature = "encryption")]
+use crate::file::column_crypto_metadata;
pub(crate) use crate::file::metadata::memory::HeapSize;
use crate::file::page_encoding_stats::{self, PageEncodingStats};
use crate::file::page_index::index::Index;
use crate::file::page_index::offset_index::OffsetIndexMetaData;
use crate::file::statistics::{self, Statistics};
-#[cfg(feature = "encryption")]
-use crate::format::ColumnCryptoMetaData as TColumnCryptoMetaData;
use crate::format::{
BoundaryOrder, ColumnChunk, ColumnIndex, ColumnMetaData, OffsetIndex, PageLocation, RowGroup,
SizeStatistics, SortingColumn,
@@ -659,11 +662,11 @@ impl RowGroupMetaData {
d.path().string()
));
}
- Some(ColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(crypto_metadata)) => {
+ Some(TColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(crypto_metadata)) => {
let column_name = crypto_metadata.path_in_schema.join(".");
decryptor.get_column_metadata_decryptor(column_name.as_str())?
}
- Some(ColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(_)) => {
+ Some(TColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(_)) => {
decryptor.get_footer_decryptor()?
}
};
diff --git c/parquet/src/file/writer.rs i/parquet/src/file/writer.rs
index 36c75d3..9408982 100644
--- c/parquet/src/file/writer.rs
+++ i/parquet/src/file/writer.rs
@@ -1467,8 +1467,6 @@ mod tests {
total_num_values as usize,
None,
Arc::new(props),
- #[cfg(feature = "encryption")]
- None,
)
.unwrap();
diff --git c/parquet/tests/arrow_reader/encryption_util.rs i/parquet/tests/arrow_reader/encryption_util.rs
index 604cd30..6dea557 100644
--- c/parquet/tests/arrow_reader/encryption_util.rs
+++ i/parquet/tests/arrow_reader/encryption_util.rs
@@ -18,15 +18,20 @@
use crate::arrow::arrow_reader::{
ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder,
};
+use crate::arrow::ParquetRecordBatchStreamBuilder;
use arrow_array::cast::AsArray;
use arrow_array::{types, RecordBatch};
use std::fs::File;
use parquet::file::metadata::ParquetMetaData;
use crate::arrow::ArrowWriter;
-use crate::encryption::encrypt::FileEncryptionProperties;
use crate::encryption::decrypt::FileDecryptionProperties;
+use crate::encryption::encrypt::FileEncryptionProperties;
+use crate::errors::ParquetError;
+use crate::file::metadata::FileMetaData;
use crate::file::properties::WriterProperties;
+use futures::TryStreamExt;
+use std::fs::File;
/// Tests reading an encrypted file from the parquet-testing repository
pub(crate) fn verify_encryption_test_file_read(
diff --git c/parquet/tests/arrow_writer_layout.rs i/parquet/tests/arrow_writer_layout.rs
index 9297b8d..9a66d13 100644
--- c/parquet/tests/arrow_writer_layout.rs
+++ i/parquet/tests/arrow_writer_layout.rs
@@ -141,8 +141,6 @@ fn assert_layout(file_reader: &Bytes, meta: &ParquetMetaData, layout: &Layout) {
row_group.num_rows() as usize,
None,
Arc::new(properties),
- #[cfg(feature = "encryption")]
- None,
)
.unwrap();
* Check if columns to encrypt are in schema
* Apply suggestions from code review
Co-authored-by: Adam Reeve <[email protected]>
* Move tests to tests/. Post rebase fixes.
* Review feedback
* Review feedback
* Minor changes
* Raise if writing plaintext footer
* Docs for crypto methods
* More practical key API
* Refactor PageEncryptor use
* Simplify with_new_compressed_buffer method
* Apply suggestions from code review
Co-authored-by: Adam Reeve <[email protected]>
* Review feedback
* Lint and remove redundant test.
* Docs
* Add async writer test for encrypted data
* Test struct array encryption, column name with '.'
* Review feedback
* First round of changes, add accessors and return result for encryption builder.
* Add
1. Fix broken defaults.
2. Improve error messages.
3. Allow return of non-encrypted metadata (for now).
* Update parquet/src/encryption/encrypt.rs
Co-authored-by: Adam Reeve <[email protected]>
* Move encryption tests
* Backout change to arrow/async_reader/mod.rs. TODO, put this in a separate PR.
* Update notes on changes to writer.rs
* Fix struct array encryption
* Minor fixes
* Lint
* Fix test
* Add '.' to struct array name
* Fix required features for encryption tests
These need Arrow but don't all need the encryption feature
* Fix reading encrypted struct columns and writer test
* Tidy ups
* Remove unnecessary clone of all row group metadata in unencrypted case
* Remove overly broad error remapping
* Tidy up duplicated test function
* Suppress unused mut error
* Re-use block encryptors in PageEncryptor
* Slightly update error message for missing column key.
* Refactor PageEncryptor to reduce use of cfg(feature)
* Refactor PageEncryptor construction in SerializedPageWriter
* Reduce use of inline #[cfg(feature = "encryption")]
* Refactor ThriftMetadataWriter to reduce use of feature checks within functions
* Tidy ups
* Refactor ArrowRowGroupWriter creation
* Make pub(crate) more explicit on some structs
* Check for length mismatch in with_column_keys
* Comment and error message tidy ups
* Add test to verify column statistics are usable after write
---------
Co-authored-by: Gidon Gershinsky <[email protected]>
Co-authored-by: Adam Reeve <[email protected]>
Co-authored-by: Corwin Joy <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>1 parent 3a45ae9 commit 3c5d3f5Copy full SHA for 3c5d3f5
File tree
27 files changed
+2553
-383
lines changedFilter options
- parquet
- src
- arrow/arrow_writer
- column
- writer
- encryption
- file
- metadata
- tests
- arrow_reader
- encryption
27 files changed
+2553
-383
lines changed+5Lines changed: 5 additions & 0 deletions
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
155 | 155 |
| |
156 | 156 |
| |
157 | 157 |
| |
| 158 | + | |
| 159 | + | |
| 160 | + | |
| 161 | + | |
| 162 | + | |
158 | 163 |
| |
159 | 164 |
| |
160 | 165 |
| |
|
+1-1Lines changed: 1 addition & 1 deletion
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
84 | 84 |
| |
85 | 85 |
| |
86 | 86 |
| |
87 |
| - | |
| 87 | + | |
88 | 88 |
| |
89 | 89 |
| |
90 | 90 |
| |
|
0 commit comments