Update triefile to use persistent ids.

This commit is contained in:
yukirij 2025-04-19 20:39:07 -07:00
parent 16c4707d6c
commit 670f001106
2 changed files with 91 additions and 45 deletions

View File

@ -6,10 +6,7 @@ async fn main()
{ {
std::fs::create_dir_all("data").ok(); std::fs::create_dir_all("data").ok();
//println!("BLOCKFILE"); /*let op = 0;
let op = 0;
if let Ok(mut bf) = BlockFile::<32>::open("data/cache_data.bin").await { if let Ok(mut bf) = BlockFile::<32>::open("data/cache_data.bin").await {
match op { match op {
0 => { 0 => {
@ -27,10 +24,7 @@ async fn main()
} }
} else { } else {
println!("Failed to open."); println!("Failed to open.");
} }*/
/*
println!("TRIEFILE");
if let Ok(mut tf) = TrieFile::<8>::open("data/cache_index.bin").await { if let Ok(mut tf) = TrieFile::<8>::open("data/cache_index.bin").await {
@ -58,6 +52,7 @@ async fn main()
println!(" = '{}'", String::from_utf8(data).unwrap()); println!(" = '{}'", String::from_utf8(data).unwrap());
} }
} }
println!("");
} }
for i in tf.ids().await.unwrap() { for i in tf.ids().await.unwrap() {
@ -70,10 +65,10 @@ async fn main()
if let Ok(Some(data)) = tf.get(i).await { if let Ok(Some(data)) = tf.get(i).await {
println!(" = '{}'", String::from_utf8(data).unwrap()); println!(" = '{}'", String::from_utf8(data).unwrap());
} }
println!("");
} }
} else { } else {
println!("Failed to open index."); println!("Failed to open index.");
} }
*/
} }

View File

@ -15,6 +15,14 @@ use tokio::{
io::{AsyncReadExt, AsyncSeekExt, SeekFrom, AsyncWriteExt}, io::{AsyncReadExt, AsyncSeekExt, SeekFrom, AsyncWriteExt},
}; };
enum Traversal {
None,
Child(u32),
Next(u32,u32),
}
const NO_REL :u32 = 0;
struct Node<const Z:usize> { struct Node<const Z:usize> {
length:u8, length:u8,
bytes:[u8; 15], bytes:[u8; 15],
@ -113,7 +121,7 @@ impl<const Z:usize> TrieFile<Z> {
let mut node_index = 0; let mut node_index = 0;
let mut key_index = 0; let mut key_index = 0;
let mut parent_index = u32::MAX; let mut traversal = Traversal::None;
let mut result = 0; let mut result = 0;
@ -128,6 +136,11 @@ impl<const Z:usize> TrieFile<Z> {
// Traverse nodes until key is found. // Traverse nodes until key is found.
while key_index < key.len() { while key_index < key.len() {
//println!("start k {}/{}", key_index, key.len()); //println!("start k {}/{}", key_index, key.len());
let parent_index = match traversal {
Traversal::None => u32::MAX,
Traversal::Child(id) => id,
Traversal::Next(_, id) => id,
};
self.read_node(node_index, &mut node).await?; self.read_node(node_index, &mut node).await?;
@ -165,9 +178,9 @@ impl<const Z:usize> TrieFile<Z> {
// Continue to child node. // Continue to child node.
else { else {
if node.child != 0 { if node.child != NO_REL {
//println!(" - child"); //println!(" - child");
parent_index = node_index; traversal = Traversal::Child(node_index);
node_index = node.child; node_index = node.child;
} }
@ -184,64 +197,91 @@ impl<const Z:usize> TrieFile<Z> {
} else { } else {
//println!(" - split"); //println!(" - split");
// Split node into one parent and two children, preserving parent block id. // Split node into one parent and two children, preserving sequence/id mapping.
key_index += prefix_index; key_index += prefix_index;
let prefix = node.bytes[0..prefix_index].to_vec(); let prefix = node.bytes[0..prefix_index].to_vec();
let suffix = node.bytes[prefix_index..].to_vec(); let suffix = node.bytes[prefix_index..].to_vec();
node.length = prefix.len() as u8; let new_index = self.allocate().await?;
for i in prefix_index..node.bytes.len() { let mut parent_node = Node::<Z>::new();
node.bytes[i] = 0;
// Set parent/child/next (original parent updated later).
parent_node.parent = node.parent;
node.parent = new_index;
parent_node.child = node_index;
node.next = NO_REL;
// Set length and text.
let mut bindex = 0;
node.length = suffix.len() as u8;
while bindex < suffix.len() {
node.bytes[bindex] = suffix[bindex];
bindex += 1;
}
while bindex < node.bytes.len() {
node.bytes[bindex] = 0;
bindex += 1;
} }
let child_index = self.allocate().await?; bindex = 0;
parent_node.length = prefix.len() as u8;
let mut child_node = Node::<Z>::new(); while bindex < prefix.len() {
child_node.parent = node_index; parent_node.bytes[bindex] = prefix[bindex];
child_node.length = suffix.len() as u8; bindex += 1;
for i in 0..15.min(suffix.len()) { }
child_node.bytes[i] = suffix[i]; while bindex < node.bytes.len() {
parent_node.bytes[bindex] = 0;
bindex += 1;
} }
child_node.child = node.child;
node.child = child_index;
// Move data to child node.
child_node.has_data = node.has_data;
child_node.data = node.data;
node.has_data = false;
node.data.fill(0);
// Write data to trailing nodes. // Write data to trailing nodes.
if key_index < key.len() { if key_index < key.len() {
let new_branch = self.trailing_nodes(key, &mut key_index, data, node_index).await?; let new_branch = self.trailing_nodes(key, &mut key_index, data, new_index).await?;
result = new_branch as usize; result = new_branch as usize;
child_node.next = new_branch; node.next = new_branch;
} }
// Write data to current node. // Write data to current node.
else { else {
node.has_data = true; parent_node.has_data = true;
for i in 0..Z.min(data.len()) { for i in 0..Z.min(data.len()) {
node.data[i] = data[i]; parent_node.data[i] = data[i];
} }
result = node_index as usize; result = new_index as usize;
} }
self.write_node(node_index, &node).await?; self.write_node(node_index, &node).await?;
self.write_node(child_index, &child_node).await?; self.write_node(new_index, &parent_node).await?;
// Update original parent node to new node.
match traversal {
Traversal::None => { }
Traversal::Child(id) => {
self.read_node(id, &mut parent_node).await?;
parent_node.child = new_index;
self.write_node(id, &mut parent_node).await?;
}
Traversal::Next(id, _) => {
self.read_node(id, &mut parent_node).await?;
parent_node.next = new_index;
self.write_node(id, &mut parent_node).await?;
}
};
} }
} }
// If node does not share prefix with key... // If node does not share prefix with key...
else { else {
// Move to or create new next node. // Move to or create new next node.
if node.next != 0 { if node.next != NO_REL {
//println!(" - next"); //println!(" - next");
// Move to next node. // Move to next node.
traversal = Traversal::Next(node_index, parent_index);
node_index = node.next; node_index = node.next;
} else { } else {
//println!(" - new next"); //println!(" - new next");
@ -299,7 +339,7 @@ impl<const Z:usize> TrieFile<Z> {
// Continue to child node. // Continue to child node.
else { else {
if node.child != 0 { if node.child != NO_REL {
//println!(" - child"); //println!(" - child");
node_index = node.child; node_index = node.child;
@ -316,7 +356,7 @@ impl<const Z:usize> TrieFile<Z> {
// If node does not share prefix with key... // If node does not share prefix with key...
else { else {
// Move to or create new next node. // Move to or create new next node.
if node.next != 0 { if node.next != NO_REL {
//println!(" - next"); //println!(" - next");
// Move to next node. // Move to next node.
@ -381,10 +421,21 @@ impl<const Z:usize> TrieFile<Z> {
Ok(output) Ok(output)
} }
/*pub fn unset(&self, _key:&[u8]) -> Result<(), std::io::Error> pub async fn unset(&mut self, key:&[u8]) -> Result<(), std::io::Error>
{ {
Ok(()) let mut node = Node::<Z>::new();
}*/ match self.find(key).await {
Ok(Some(id)) => {
self.read_node(id as u32, &mut node).await?;
node.has_data = false;
node.data = [0; Z];
self.write_node(id as u32, &node).await?;
Ok(())
}
Ok(None) => Ok(()),
Err(e) => Err(e),
}
}
async fn trailing_nodes(&mut self, key:&[u8], key_index:&mut usize, data:&[u8], parent:u32) -> Result<u32, std::io::Error> async fn trailing_nodes(&mut self, key:&[u8], key_index:&mut usize, data:&[u8], parent:u32) -> Result<u32, std::io::Error>
{ {