From 670f00110631b84672bf7ada1139ca0101e6c9c5 Mon Sep 17 00:00:00 2001 From: yukirij Date: Sat, 19 Apr 2025 20:39:07 -0700 Subject: [PATCH] Update triefile to use persistent ids. --- src/bin/test.rs | 13 ++--- src/triefile/mod.rs | 123 +++++++++++++++++++++++++++++++------------- 2 files changed, 91 insertions(+), 45 deletions(-) diff --git a/src/bin/test.rs b/src/bin/test.rs index c6959ee..fe77c6e 100644 --- a/src/bin/test.rs +++ b/src/bin/test.rs @@ -6,10 +6,7 @@ async fn main() { std::fs::create_dir_all("data").ok(); - //println!("BLOCKFILE"); - - let op = 0; - + /*let op = 0; if let Ok(mut bf) = BlockFile::<32>::open("data/cache_data.bin").await { match op { 0 => { @@ -27,10 +24,7 @@ async fn main() } } else { println!("Failed to open."); - } - - /* - println!("TRIEFILE"); + }*/ if let Ok(mut tf) = TrieFile::<8>::open("data/cache_index.bin").await { @@ -58,6 +52,7 @@ async fn main() println!(" = '{}'", String::from_utf8(data).unwrap()); } } + println!(""); } for i in tf.ids().await.unwrap() { @@ -70,10 +65,10 @@ async fn main() if let Ok(Some(data)) = tf.get(i).await { println!(" = '{}'", String::from_utf8(data).unwrap()); } + println!(""); } } else { println!("Failed to open index."); } - */ } diff --git a/src/triefile/mod.rs b/src/triefile/mod.rs index c5d7c91..5dad748 100644 --- a/src/triefile/mod.rs +++ b/src/triefile/mod.rs @@ -15,6 +15,14 @@ use tokio::{ io::{AsyncReadExt, AsyncSeekExt, SeekFrom, AsyncWriteExt}, }; +enum Traversal { + None, + Child(u32), + Next(u32,u32), +} + +const NO_REL :u32 = 0; + struct Node { length:u8, bytes:[u8; 15], @@ -113,7 +121,7 @@ impl TrieFile { let mut node_index = 0; let mut key_index = 0; - let mut parent_index = u32::MAX; + let mut traversal = Traversal::None; let mut result = 0; @@ -128,6 +136,11 @@ impl TrieFile { // Traverse nodes until key is found. while key_index < key.len() { //println!("start k {}/{}", key_index, key.len()); + let parent_index = match traversal { + Traversal::None => u32::MAX, + Traversal::Child(id) => id, + Traversal::Next(_, id) => id, + }; self.read_node(node_index, &mut node).await?; @@ -165,9 +178,9 @@ impl TrieFile { // Continue to child node. else { - if node.child != 0 { + if node.child != NO_REL { //println!(" - child"); - parent_index = node_index; + traversal = Traversal::Child(node_index); node_index = node.child; } @@ -184,64 +197,91 @@ impl TrieFile { } else { //println!(" - split"); - // Split node into one parent and two children, preserving parent block id. + // Split node into one parent and two children, preserving sequence/id mapping. key_index += prefix_index; let prefix = node.bytes[0..prefix_index].to_vec(); let suffix = node.bytes[prefix_index..].to_vec(); - node.length = prefix.len() as u8; - for i in prefix_index..node.bytes.len() { - node.bytes[i] = 0; + let new_index = self.allocate().await?; + let mut parent_node = Node::::new(); + + // Set parent/child/next (original parent updated later). + parent_node.parent = node.parent; + node.parent = new_index; + + parent_node.child = node_index; + + node.next = NO_REL; + + // Set length and text. + let mut bindex = 0; + node.length = suffix.len() as u8; + while bindex < suffix.len() { + node.bytes[bindex] = suffix[bindex]; + bindex += 1; + } + while bindex < node.bytes.len() { + node.bytes[bindex] = 0; + bindex += 1; } - let child_index = self.allocate().await?; - - let mut child_node = Node::::new(); - child_node.parent = node_index; - child_node.length = suffix.len() as u8; - for i in 0..15.min(suffix.len()) { - child_node.bytes[i] = suffix[i]; + bindex = 0; + parent_node.length = prefix.len() as u8; + while bindex < prefix.len() { + parent_node.bytes[bindex] = prefix[bindex]; + bindex += 1; + } + while bindex < node.bytes.len() { + parent_node.bytes[bindex] = 0; + bindex += 1; } - - child_node.child = node.child; - node.child = child_index; - - // Move data to child node. - child_node.has_data = node.has_data; - child_node.data = node.data; - node.has_data = false; - node.data.fill(0); // Write data to trailing nodes. if key_index < key.len() { - let new_branch = self.trailing_nodes(key, &mut key_index, data, node_index).await?; + let new_branch = self.trailing_nodes(key, &mut key_index, data, new_index).await?; result = new_branch as usize; - child_node.next = new_branch; + node.next = new_branch; } - // Write data to current node. else { - node.has_data = true; + parent_node.has_data = true; for i in 0..Z.min(data.len()) { - node.data[i] = data[i]; + parent_node.data[i] = data[i]; } - result = node_index as usize; + result = new_index as usize; } self.write_node(node_index, &node).await?; - self.write_node(child_index, &child_node).await?; + self.write_node(new_index, &parent_node).await?; + + + // Update original parent node to new node. + match traversal { + Traversal::None => { } + Traversal::Child(id) => { + self.read_node(id, &mut parent_node).await?; + parent_node.child = new_index; + self.write_node(id, &mut parent_node).await?; + } + Traversal::Next(id, _) => { + self.read_node(id, &mut parent_node).await?; + parent_node.next = new_index; + self.write_node(id, &mut parent_node).await?; + } + }; } } // If node does not share prefix with key... else { // Move to or create new next node. - if node.next != 0 { + if node.next != NO_REL { //println!(" - next"); // Move to next node. + traversal = Traversal::Next(node_index, parent_index); node_index = node.next; } else { //println!(" - new next"); @@ -299,7 +339,7 @@ impl TrieFile { // Continue to child node. else { - if node.child != 0 { + if node.child != NO_REL { //println!(" - child"); node_index = node.child; @@ -316,7 +356,7 @@ impl TrieFile { // If node does not share prefix with key... else { // Move to or create new next node. - if node.next != 0 { + if node.next != NO_REL { //println!(" - next"); // Move to next node. @@ -381,10 +421,21 @@ impl TrieFile { Ok(output) } - /*pub fn unset(&self, _key:&[u8]) -> Result<(), std::io::Error> + pub async fn unset(&mut self, key:&[u8]) -> Result<(), std::io::Error> { - Ok(()) - }*/ + let mut node = Node::::new(); + match self.find(key).await { + Ok(Some(id)) => { + self.read_node(id as u32, &mut node).await?; + node.has_data = false; + node.data = [0; Z]; + self.write_node(id as u32, &node).await?; + Ok(()) + } + Ok(None) => Ok(()), + Err(e) => Err(e), + } + } async fn trailing_nodes(&mut self, key:&[u8], key_index:&mut usize, data:&[u8], parent:u32) -> Result {