Skip to content

Commit 3cb107e

Browse files
authored
Improve tree balancing (#40)
* Refactor to improve probabilistic tree balancing * fix clippy
1 parent 402a150 commit 3cb107e

File tree

7 files changed

+163
-88
lines changed

7 files changed

+163
-88
lines changed

src/encoding.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ mod tests {
395395
// Convert the RecordBatch to a string for comparison
396396
let batch_string = record_batch_to_string(&batch);
397397
assert_eq!(batch.num_rows(), 2);
398-
println!("{}", batch_string);
398+
println!("{batch_string}");
399399
// Define the expected output
400400
let expected_output = r#"id: 1, 2
401401
uuid: guid-key1, guid-key2
@@ -474,7 +474,7 @@ name: name1, name2
474474
// Convert the RecordBatch to a string for comparison
475475
let batch_string = record_batch_to_string(&batch);
476476
assert_eq!(batch.num_rows(), 2);
477-
println!("{}", batch_string);
477+
println!("{batch_string}");
478478
// Define the expected output
479479
let expected_output = r#"id: 1, 2
480480
uuid: guid-key1, guid-key2

src/lib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@ limitations under the License.
2222
//! ## Features
2323
//!
2424
//! - **Verifiability**: The cryptographic hashing in Prolly Trees ensures data integrity and allows for
25-
//! verifiable proofs of inclusion/exclusion.
25+
//! verifiable proofs of inclusion/exclusion.
2626
//! - **Performance**: The balanced tree structure provides efficient data access patterns similar to
27-
//! B-trees, ensuring high performance for both random and sequential access.
27+
//! B-trees, ensuring high performance for both random and sequential access.
2828
//! - **Scalability**: Prolly Trees are suitable for large-scale applications, providing efficient index maintenance
29-
//! and data distribution capabilities.
29+
//! and data distribution capabilities.
3030
//! - **Flexibility**: The probabilistic balancing allows for handling various mutation patterns without degrading
31-
//! performance or structure.
31+
//! performance or structure.
3232
//!
3333
//! ## Usage
3434
//!

src/main.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ fn main() {
7979
"Proof for key \x1b[32m{:?}\x1b[0m in increasing order is valid: {}",
8080
keys[i], is_valid
8181
);
82-
println!("Proof: {:#?}", proof); // Assuming Debug trait is implemented
83-
// Sleep for 2 seconds
82+
println!("Proof: {proof:#?}"); // Assuming Debug trait is implemented
83+
// Sleep for 2 seconds
8484
sleep(Duration::from_millis(200));
8585
}
8686

@@ -119,8 +119,8 @@ fn main() {
119119
"Proof for key \x1b[32m{:?}\x1b[0m in reverse order is valid: {}",
120120
keys[i], is_valid
121121
);
122-
println!("Proof: {:#?}", proof); // Assuming Debug trait is implemented
123-
// Sleep for 2 seconds
122+
println!("Proof: {proof:#?}"); // Assuming Debug trait is implemented
123+
// Sleep for 2 seconds
124124
sleep(Duration::from_millis(200));
125125
}
126126
}

src/node.rs

Lines changed: 122 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -261,56 +261,29 @@ impl<const N: usize> ProllyNodeBuilder<N> {
261261
}
262262
}
263263

264-
impl<const N: usize> ProllyNode<N> {
265-
pub fn init_root(key: Vec<u8>, value: Vec<u8>) -> Self {
266-
ProllyNode {
267-
keys: vec![key],
268-
values: vec![value],
269-
is_leaf: true,
270-
level: INIT_LEVEL,
271-
..Default::default()
272-
}
273-
}
274-
275-
pub fn builder() -> ProllyNodeBuilder<N> {
276-
ProllyNodeBuilder::default()
277-
}
278-
279-
pub fn formatted_traverse_3<F>(&self, storage: &impl NodeStorage<N>, formatter: F) -> String
280-
where
281-
F: Fn(&ProllyNode<N>, &str, bool) -> String,
282-
{
283-
fn traverse_node<const N: usize, S: NodeStorage<N>, F>(
284-
node: &ProllyNode<N>,
285-
storage: &S,
286-
formatter: &F,
287-
prefix: &str,
288-
is_last: bool,
289-
output: &mut String,
290-
) where
291-
F: Fn(&ProllyNode<N>, &str, bool) -> String,
292-
{
293-
*output += &formatter(node, prefix, is_last);
264+
/// Trait for balancing nodes in the tree.
265+
/// This trait provides methods for splitting and merging nodes to maintain tree balance.
266+
trait Balanced<const N: usize> {
267+
/// Balances the node by splitting or merging it as needed.
268+
fn balance<S: NodeStorage<N>>(
269+
&mut self,
270+
storage: &mut S,
271+
is_root_node: bool,
272+
path_hashes: &[ValueDigest<N>],
273+
);
294274

295-
let new_prefix = format!("{}{}", prefix, if is_last { " " } else { "│ " });
296-
let children = node.children(storage);
297-
for (i, child) in children.iter().enumerate() {
298-
traverse_node(
299-
child,
300-
storage,
301-
formatter,
302-
&new_prefix,
303-
i == children.len() - 1,
304-
output,
305-
);
306-
}
307-
}
275+
/// Gets the hash of the next sibling of the node.
276+
fn get_next_sibling_hash<S: NodeStorage<N>>(
277+
&self,
278+
storage: &S,
279+
path_hashes: &[ValueDigest<N>],
280+
) -> Option<Vec<u8>>;
308281

309-
let mut output = String::new();
310-
traverse_node(self, storage, &formatter, "", true, &mut output);
311-
output
312-
}
282+
/// Merges the node with its next sibling.
283+
fn merge_with_next_sibling(&mut self, next_sibling: &mut ProllyNode<N>);
284+
}
313285

286+
impl<const N: usize> Balanced<N> for ProllyNode<N> {
314287
/// Attempts to balance the node by merging the next (right) neighbor
315288
/// and then splitting it into smaller nodes if necessary.
316289
fn balance<S: NodeStorage<N>>(
@@ -338,6 +311,9 @@ impl<const N: usize> ProllyNode<N> {
338311
}
339312

340313
// Use chunk_content to determine split points
314+
if self.keys.len() < self.min_chunk_size {
315+
return;
316+
}
341317
let chunks = self.chunk_content();
342318
if chunks.len() <= 1 {
343319
// do not need to split the node
@@ -465,8 +441,62 @@ impl<const N: usize> ProllyNode<N> {
465441
}
466442
}
467443

444+
impl<const N: usize> ProllyNode<N> {
445+
pub fn init_root(key: Vec<u8>, value: Vec<u8>) -> Self {
446+
ProllyNode {
447+
keys: vec![key],
448+
values: vec![value],
449+
is_leaf: true,
450+
level: INIT_LEVEL,
451+
..Default::default()
452+
}
453+
}
454+
455+
pub fn builder() -> ProllyNodeBuilder<N> {
456+
ProllyNodeBuilder::default()
457+
}
458+
459+
pub fn formatted_traverse_3<F>(&self, storage: &impl NodeStorage<N>, formatter: F) -> String
460+
where
461+
F: Fn(&ProllyNode<N>, &str, bool) -> String,
462+
{
463+
fn traverse_node<const N: usize, S: NodeStorage<N>, F>(
464+
node: &ProllyNode<N>,
465+
storage: &S,
466+
formatter: &F,
467+
prefix: &str,
468+
is_last: bool,
469+
output: &mut String,
470+
) where
471+
F: Fn(&ProllyNode<N>, &str, bool) -> String,
472+
{
473+
*output += &formatter(node, prefix, is_last);
474+
475+
let new_prefix = format!("{}{}", prefix, if is_last { " " } else { "│ " });
476+
let children = node.children(storage);
477+
for (i, child) in children.iter().enumerate() {
478+
traverse_node(
479+
child,
480+
storage,
481+
formatter,
482+
&new_prefix,
483+
i == children.len() - 1,
484+
output,
485+
);
486+
}
487+
}
488+
489+
let mut output = String::new();
490+
traverse_node(self, storage, &formatter, "", true, &mut output);
491+
output
492+
}
493+
}
494+
468495
impl<const N: usize> NodeChunk for ProllyNode<N> {
469496
fn chunk_content(&self) -> Vec<(usize, usize)> {
497+
if self.keys.len() < self.min_chunk_size {
498+
return Vec::new();
499+
}
470500
let mut chunks = Vec::new();
471501
let mut start = 0;
472502
let mut last_start = 0;
@@ -688,7 +718,7 @@ impl<const N: usize> Node<N> for ProllyNode<N> {
688718
}
689719
} else {
690720
// Handle the case when the child node is not found
691-
println!("Child node not found: {:?}", child_hash);
721+
println!("Child node not found: {child_hash:?}");
692722
}
693723

694724
// Sort the keys and balance the node
@@ -815,7 +845,7 @@ impl<const N: usize> Node<N> for ProllyNode<N> {
815845
true
816846
} else {
817847
// Handle the case when the child node is not found
818-
println!("Child node not found: {:?}", child_hash);
848+
println!("Child node not found: {child_hash:?}");
819849
false
820850
}
821851
}
@@ -869,7 +899,7 @@ impl<const N: usize> Node<N> for ProllyNode<N> {
869899
.iter()
870900
.map(|key| {
871901
key.iter()
872-
.map(|byte| format!("{:0}", byte))
902+
.map(|byte| format!("{byte:0}"))
873903
.collect::<Vec<String>>()
874904
.join(" ")
875905
})
@@ -886,16 +916,15 @@ impl<const N: usize> Node<N> for ProllyNode<N> {
886916
)
887917
} else {
888918
format!(
889-
"{}{}#({}\x1B[31m0x{:?}\x1B[0m)[{}]\n",
919+
"{}{}#({:?})[{}]\n",
890920
prefix,
891921
if is_last { "└── " } else { "├── " },
892-
"",
893922
hash,
894923
keys_str
895924
)
896925
}
897926
});
898-
println!("{}", output);
927+
println!("{output}");
899928
println!("Note: #[keys] indicates internal node, [keys] indicates leaf node");
900929
}
901930
}
@@ -962,6 +991,7 @@ impl<const N: usize> ProllyNode<N> {
962991
/// * `storage` - The storage implementation to retrieve child nodes.
963992
/// * `formatter` - A closure that takes a reference to a node and returns a string representation of the node.
964993
///
994+
///
965995
/// # Returns
966996
/// A string representation of the tree nodes in a breadth-first order.
967997
pub fn formatted_traverse<F>(&self, storage: &impl NodeStorage<N>, formatter: F) -> String
@@ -1471,4 +1501,43 @@ mod tests {
14711501
// Print chunk content
14721502
println!("{:?}", node.chunk_content());
14731503
}
1504+
1505+
/// This test verifies the balancing of the tree after multiple insertions.
1506+
/// The test checks the tree structure and ensures that the root node is split correctly
1507+
/// and the keys are promoted to the parent node.
1508+
#[test]
1509+
fn test_balance_after_insertions() {
1510+
let mut storage = InMemoryNodeStorage::<32>::default();
1511+
let value_for_all = vec![100];
1512+
1513+
// Initialize the prolly tree with a small chunk size to trigger splits
1514+
let mut node: ProllyNode<32> = ProllyNode::builder()
1515+
.pattern(0b1)
1516+
.min_chunk_size(4)
1517+
.max_chunk_size(8)
1518+
.build();
1519+
1520+
// Insert key-value pairs to trigger a split
1521+
for i in 0..=10 {
1522+
node.insert(vec![i], value_for_all.clone(), &mut storage, Vec::new());
1523+
storage.insert_node(node.get_hash(), node.clone());
1524+
}
1525+
1526+
// After 11 insertions, the root should not be a leaf node
1527+
assert!(!node.is_leaf);
1528+
1529+
// Check that all keys can be found
1530+
for i in 0..=10 {
1531+
assert!(node.find(&[i], &storage).is_some());
1532+
}
1533+
1534+
// Insert one more key to trigger another split
1535+
node.insert(vec![11], value_for_all.clone(), &mut storage, Vec::new());
1536+
storage.insert_node(node.get_hash(), node.clone());
1537+
1538+
// Check that all keys can still be found
1539+
for i in 0..=11 {
1540+
assert!(node.find(&[i], &storage).is_some());
1541+
}
1542+
}
14741543
}

src/proof.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ impl<const N: usize> fmt::Debug for Proof<N> {
3434
.map(|digest| {
3535
let bytes = digest.as_bytes();
3636
if bytes.len() > 8 {
37-
format!("{:02x?}...", &bytes[..8])
37+
format!("{bytes:02x?}...")
3838
} else {
39-
format!("{:02x?}", bytes)
39+
format!("{bytes:02x?}")
4040
}
4141
})
4242
.collect::<Vec<_>>(),
@@ -46,9 +46,9 @@ impl<const N: usize> fmt::Debug for Proof<N> {
4646
&self.target_hash.as_ref().map(|digest| {
4747
let bytes = digest.as_bytes();
4848
if bytes.len() > 8 {
49-
format!("{:02x?}...", &bytes[..8])
49+
format!("{bytes:02x?}...")
5050
} else {
51-
format!("{:02x?}", bytes)
51+
format!("{bytes:02x?}")
5252
}
5353
}),
5454
)

src/storage.rs

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ limitations under the License.
1515
use crate::digest::ValueDigest;
1616
use crate::node::ProllyNode;
1717
use std::collections::HashMap;
18-
use std::fmt;
18+
use std::fmt::{Display, Formatter, LowerHex};
1919
use std::fs::{self, File};
2020
use std::io::{Read, Write};
2121
use std::path::PathBuf;
@@ -123,18 +123,27 @@ impl<const N: usize> FileNodeStorage<N> {
123123
}
124124

125125
fn node_path(&self, hash: &ValueDigest<N>) -> PathBuf {
126-
self.storage_dir.join(format!("{:x}", hash))
126+
self.storage_dir.join(format!("{hash:x}"))
127127
}
128128

129129
fn config_path(&self, key: &str) -> PathBuf {
130-
self.storage_dir.join(format!("config_{}", key))
130+
self.storage_dir.join(format!("config_{key}"))
131131
}
132132
}
133133

134-
impl<const N: usize> fmt::LowerHex for ValueDigest<N> {
135-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
136-
for byte in &self.0 {
137-
write!(f, "{:02x}", byte)?;
134+
impl<const N: usize> Display for ValueDigest<N> {
135+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
136+
for byte in self.0 {
137+
write!(f, "{byte:02x}")?;
138+
}
139+
Ok(())
140+
}
141+
}
142+
143+
impl<const N: usize> LowerHex for ValueDigest<N> {
144+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
145+
for byte in self.0 {
146+
write!(f, "{byte:02x}")?;
138147
}
139148
Ok(())
140149
}

0 commit comments

Comments
 (0)