diff --git a/src/encoding.rs b/src/encoding.rs index 6a81599..38ab5ab 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -395,7 +395,7 @@ mod tests { // Convert the RecordBatch to a string for comparison let batch_string = record_batch_to_string(&batch); assert_eq!(batch.num_rows(), 2); - println!("{}", batch_string); + println!("{batch_string}"); // Define the expected output let expected_output = r#"id: 1, 2 uuid: guid-key1, guid-key2 @@ -474,7 +474,7 @@ name: name1, name2 // Convert the RecordBatch to a string for comparison let batch_string = record_batch_to_string(&batch); assert_eq!(batch.num_rows(), 2); - println!("{}", batch_string); + println!("{batch_string}"); // Define the expected output let expected_output = r#"id: 1, 2 uuid: guid-key1, guid-key2 diff --git a/src/lib.rs b/src/lib.rs index caa55e1..a3427c7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,13 +22,13 @@ limitations under the License. //! ## Features //! //! - **Verifiability**: The cryptographic hashing in Prolly Trees ensures data integrity and allows for -//! verifiable proofs of inclusion/exclusion. +//! verifiable proofs of inclusion/exclusion. //! - **Performance**: The balanced tree structure provides efficient data access patterns similar to -//! B-trees, ensuring high performance for both random and sequential access. +//! B-trees, ensuring high performance for both random and sequential access. //! - **Scalability**: Prolly Trees are suitable for large-scale applications, providing efficient index maintenance -//! and data distribution capabilities. +//! and data distribution capabilities. //! - **Flexibility**: The probabilistic balancing allows for handling various mutation patterns without degrading -//! performance or structure. +//! performance or structure. //! //! ## Usage //! diff --git a/src/main.rs b/src/main.rs index d39a66f..cf6d434 100644 --- a/src/main.rs +++ b/src/main.rs @@ -79,8 +79,8 @@ fn main() { "Proof for key \x1b[32m{:?}\x1b[0m in increasing order is valid: {}", keys[i], is_valid ); - println!("Proof: {:#?}", proof); // Assuming Debug trait is implemented - // Sleep for 2 seconds + println!("Proof: {proof:#?}"); // Assuming Debug trait is implemented + // Sleep for 2 seconds sleep(Duration::from_millis(200)); } @@ -119,8 +119,8 @@ fn main() { "Proof for key \x1b[32m{:?}\x1b[0m in reverse order is valid: {}", keys[i], is_valid ); - println!("Proof: {:#?}", proof); // Assuming Debug trait is implemented - // Sleep for 2 seconds + println!("Proof: {proof:#?}"); // Assuming Debug trait is implemented + // Sleep for 2 seconds sleep(Duration::from_millis(200)); } } diff --git a/src/node.rs b/src/node.rs index a4a2320..db7bb17 100644 --- a/src/node.rs +++ b/src/node.rs @@ -261,56 +261,29 @@ impl ProllyNodeBuilder { } } -impl ProllyNode { - pub fn init_root(key: Vec, value: Vec) -> Self { - ProllyNode { - keys: vec![key], - values: vec![value], - is_leaf: true, - level: INIT_LEVEL, - ..Default::default() - } - } - - pub fn builder() -> ProllyNodeBuilder { - ProllyNodeBuilder::default() - } - - pub fn formatted_traverse_3(&self, storage: &impl NodeStorage, formatter: F) -> String - where - F: Fn(&ProllyNode, &str, bool) -> String, - { - fn traverse_node, F>( - node: &ProllyNode, - storage: &S, - formatter: &F, - prefix: &str, - is_last: bool, - output: &mut String, - ) where - F: Fn(&ProllyNode, &str, bool) -> String, - { - *output += &formatter(node, prefix, is_last); +/// Trait for balancing nodes in the tree. +/// This trait provides methods for splitting and merging nodes to maintain tree balance. +trait Balanced { + /// Balances the node by splitting or merging it as needed. + fn balance>( + &mut self, + storage: &mut S, + is_root_node: bool, + path_hashes: &[ValueDigest], + ); - let new_prefix = format!("{}{}", prefix, if is_last { " " } else { "│ " }); - let children = node.children(storage); - for (i, child) in children.iter().enumerate() { - traverse_node( - child, - storage, - formatter, - &new_prefix, - i == children.len() - 1, - output, - ); - } - } + /// Gets the hash of the next sibling of the node. + fn get_next_sibling_hash>( + &self, + storage: &S, + path_hashes: &[ValueDigest], + ) -> Option>; - let mut output = String::new(); - traverse_node(self, storage, &formatter, "", true, &mut output); - output - } + /// Merges the node with its next sibling. + fn merge_with_next_sibling(&mut self, next_sibling: &mut ProllyNode); +} +impl Balanced for ProllyNode { /// Attempts to balance the node by merging the next (right) neighbor /// and then splitting it into smaller nodes if necessary. fn balance>( @@ -338,6 +311,9 @@ impl ProllyNode { } // Use chunk_content to determine split points + if self.keys.len() < self.min_chunk_size { + return; + } let chunks = self.chunk_content(); if chunks.len() <= 1 { // do not need to split the node @@ -465,8 +441,62 @@ impl ProllyNode { } } +impl ProllyNode { + pub fn init_root(key: Vec, value: Vec) -> Self { + ProllyNode { + keys: vec![key], + values: vec![value], + is_leaf: true, + level: INIT_LEVEL, + ..Default::default() + } + } + + pub fn builder() -> ProllyNodeBuilder { + ProllyNodeBuilder::default() + } + + pub fn formatted_traverse_3(&self, storage: &impl NodeStorage, formatter: F) -> String + where + F: Fn(&ProllyNode, &str, bool) -> String, + { + fn traverse_node, F>( + node: &ProllyNode, + storage: &S, + formatter: &F, + prefix: &str, + is_last: bool, + output: &mut String, + ) where + F: Fn(&ProllyNode, &str, bool) -> String, + { + *output += &formatter(node, prefix, is_last); + + let new_prefix = format!("{}{}", prefix, if is_last { " " } else { "│ " }); + let children = node.children(storage); + for (i, child) in children.iter().enumerate() { + traverse_node( + child, + storage, + formatter, + &new_prefix, + i == children.len() - 1, + output, + ); + } + } + + let mut output = String::new(); + traverse_node(self, storage, &formatter, "", true, &mut output); + output + } +} + impl NodeChunk for ProllyNode { fn chunk_content(&self) -> Vec<(usize, usize)> { + if self.keys.len() < self.min_chunk_size { + return Vec::new(); + } let mut chunks = Vec::new(); let mut start = 0; let mut last_start = 0; @@ -688,7 +718,7 @@ impl Node for ProllyNode { } } else { // Handle the case when the child node is not found - println!("Child node not found: {:?}", child_hash); + println!("Child node not found: {child_hash:?}"); } // Sort the keys and balance the node @@ -815,7 +845,7 @@ impl Node for ProllyNode { true } else { // Handle the case when the child node is not found - println!("Child node not found: {:?}", child_hash); + println!("Child node not found: {child_hash:?}"); false } } @@ -869,7 +899,7 @@ impl Node for ProllyNode { .iter() .map(|key| { key.iter() - .map(|byte| format!("{:0}", byte)) + .map(|byte| format!("{byte:0}")) .collect::>() .join(" ") }) @@ -886,16 +916,15 @@ impl Node for ProllyNode { ) } else { format!( - "{}{}#({}\x1B[31m0x{:?}\x1B[0m)[{}]\n", + "{}{}#({:?})[{}]\n", prefix, if is_last { "└── " } else { "├── " }, - "", hash, keys_str ) } }); - println!("{}", output); + println!("{output}"); println!("Note: #[keys] indicates internal node, [keys] indicates leaf node"); } } @@ -962,6 +991,7 @@ impl ProllyNode { /// * `storage` - The storage implementation to retrieve child nodes. /// * `formatter` - A closure that takes a reference to a node and returns a string representation of the node. /// + /// /// # Returns /// A string representation of the tree nodes in a breadth-first order. pub fn formatted_traverse(&self, storage: &impl NodeStorage, formatter: F) -> String @@ -1471,4 +1501,43 @@ mod tests { // Print chunk content println!("{:?}", node.chunk_content()); } + + /// This test verifies the balancing of the tree after multiple insertions. + /// The test checks the tree structure and ensures that the root node is split correctly + /// and the keys are promoted to the parent node. + #[test] + fn test_balance_after_insertions() { + let mut storage = InMemoryNodeStorage::<32>::default(); + let value_for_all = vec![100]; + + // Initialize the prolly tree with a small chunk size to trigger splits + let mut node: ProllyNode<32> = ProllyNode::builder() + .pattern(0b1) + .min_chunk_size(4) + .max_chunk_size(8) + .build(); + + // Insert key-value pairs to trigger a split + for i in 0..=10 { + node.insert(vec![i], value_for_all.clone(), &mut storage, Vec::new()); + storage.insert_node(node.get_hash(), node.clone()); + } + + // After 11 insertions, the root should not be a leaf node + assert!(!node.is_leaf); + + // Check that all keys can be found + for i in 0..=10 { + assert!(node.find(&[i], &storage).is_some()); + } + + // Insert one more key to trigger another split + node.insert(vec![11], value_for_all.clone(), &mut storage, Vec::new()); + storage.insert_node(node.get_hash(), node.clone()); + + // Check that all keys can still be found + for i in 0..=11 { + assert!(node.find(&[i], &storage).is_some()); + } + } } diff --git a/src/proof.rs b/src/proof.rs index b6e02a6..21f6bd8 100644 --- a/src/proof.rs +++ b/src/proof.rs @@ -34,9 +34,9 @@ impl fmt::Debug for Proof { .map(|digest| { let bytes = digest.as_bytes(); if bytes.len() > 8 { - format!("{:02x?}...", &bytes[..8]) + format!("{bytes:02x?}...") } else { - format!("{:02x?}", bytes) + format!("{bytes:02x?}") } }) .collect::>(), @@ -46,9 +46,9 @@ impl fmt::Debug for Proof { &self.target_hash.as_ref().map(|digest| { let bytes = digest.as_bytes(); if bytes.len() > 8 { - format!("{:02x?}...", &bytes[..8]) + format!("{bytes:02x?}...") } else { - format!("{:02x?}", bytes) + format!("{bytes:02x?}") } }), ) diff --git a/src/storage.rs b/src/storage.rs index 858544a..5d01383 100644 --- a/src/storage.rs +++ b/src/storage.rs @@ -15,7 +15,7 @@ limitations under the License. use crate::digest::ValueDigest; use crate::node::ProllyNode; use std::collections::HashMap; -use std::fmt; +use std::fmt::{Display, Formatter, LowerHex}; use std::fs::{self, File}; use std::io::{Read, Write}; use std::path::PathBuf; @@ -123,18 +123,27 @@ impl FileNodeStorage { } fn node_path(&self, hash: &ValueDigest) -> PathBuf { - self.storage_dir.join(format!("{:x}", hash)) + self.storage_dir.join(format!("{hash:x}")) } fn config_path(&self, key: &str) -> PathBuf { - self.storage_dir.join(format!("config_{}", key)) + self.storage_dir.join(format!("config_{key}")) } } -impl fmt::LowerHex for ValueDigest { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for byte in &self.0 { - write!(f, "{:02x}", byte)?; +impl Display for ValueDigest { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + for byte in self.0 { + write!(f, "{byte:02x}")?; + } + Ok(()) + } +} + +impl LowerHex for ValueDigest { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + for byte in self.0 { + write!(f, "{byte:02x}")?; } Ok(()) } diff --git a/src/tree.rs b/src/tree.rs index f0510d2..9f6a0b9 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -593,18 +593,17 @@ mod tests { // 5. Traverse the Tree with a Custom Formatter let traversal = tree.formatted_traverse(|node| { - let keys_as_strings: Vec = - node.keys.iter().map(|k| format!("{:?}", k)).collect(); + let keys_as_strings: Vec = node.keys.iter().map(|k| format!("{k:?}")).collect(); format!("[L{}: {}]", node.level, keys_as_strings.join(", ")) }); - println!("Traversal: {}", traversal); + println!("Traversal: {traversal}"); // 6. Update the Value for an Existing Key tree.update(b"key1".to_vec(), b"new_value1".to_vec()); // 7. Find or Search for a Key if let Some(node) = tree.find(b"key1") { - println!("Found key1 with value: {:?}", node); + println!("Found key1 with value: {node:?}"); } else { println!("key1 not found"); } @@ -657,18 +656,17 @@ mod tests { // 5. Traverse the Tree with a Custom Formatter let traversal = tree.formatted_traverse(|node| { - let keys_as_strings: Vec = - node.keys.iter().map(|k| format!("{:?}", k)).collect(); + let keys_as_strings: Vec = node.keys.iter().map(|k| format!("{k:?}")).collect(); format!("[L{}: {}]", node.level, keys_as_strings.join(", ")) }); - println!("Traversal: {}", traversal); + println!("Traversal: {traversal}"); // 6. Update the Value for an Existing Key tree.update(b"key1".to_vec(), b"new_value1".to_vec()); // 7. Find or Search for a Key if let Some(node) = tree.find(b"key1") { - println!("Found key1 with value: {:?}", node); + println!("Found key1 with value: {node:?}"); } else { println!("key1 not found"); } @@ -775,8 +773,8 @@ mod tests { let traversal = tree.traverse(); // Convert byte arrays to their binary representation strings for comparison - let expected_key1 = format!("{:?}", key1); - let expected_key2 = format!("{:?}", key2); + let expected_key1 = format!("{key1:?}"); + let expected_key2 = format!("{key2:?}"); // Check if the traversal contains the expected keys assert!(traversal.contains(&expected_key1.to_string())); @@ -895,15 +893,14 @@ mod tests { for diff in &differences { match diff { DiffResult::Added(key, value) => { - println!("Added: key = {:?}, value = {:?}", key, value); + println!("Added: key = {key:?}, value = {value:?}"); } DiffResult::Removed(key, value) => { - println!("Removed: key = {:?}, value = {:?}", key, value); + println!("Removed: key = {key:?}, value = {value:?}"); } DiffResult::Modified(key, old_value, new_value) => { println!( - "Modified: key = {:?}, old_value = {:?}, new_value = {:?}", - key, old_value, new_value + "Modified: key = {key:?}, old_value = {old_value:?}, new_value = {new_value:?}" ); } }