Update triefile to use persistent ids.

This commit is contained in:
yukirij 2025-04-19 20:39:07 -07:00
parent 16c4707d6c
commit 670f001106
2 changed files with 91 additions and 45 deletions

View File

@ -6,10 +6,7 @@ async fn main()
{
std::fs::create_dir_all("data").ok();
//println!("BLOCKFILE");
let op = 0;
/*let op = 0;
if let Ok(mut bf) = BlockFile::<32>::open("data/cache_data.bin").await {
match op {
0 => {
@ -27,10 +24,7 @@ async fn main()
}
} else {
println!("Failed to open.");
}
/*
println!("TRIEFILE");
}*/
if let Ok(mut tf) = TrieFile::<8>::open("data/cache_index.bin").await {
@ -58,6 +52,7 @@ async fn main()
println!(" = '{}'", String::from_utf8(data).unwrap());
}
}
println!("");
}
for i in tf.ids().await.unwrap() {
@ -70,10 +65,10 @@ async fn main()
if let Ok(Some(data)) = tf.get(i).await {
println!(" = '{}'", String::from_utf8(data).unwrap());
}
println!("");
}
} else {
println!("Failed to open index.");
}
*/
}

View File

@ -15,6 +15,14 @@ use tokio::{
io::{AsyncReadExt, AsyncSeekExt, SeekFrom, AsyncWriteExt},
};
enum Traversal {
None,
Child(u32),
Next(u32,u32),
}
const NO_REL :u32 = 0;
struct Node<const Z:usize> {
length:u8,
bytes:[u8; 15],
@ -113,7 +121,7 @@ impl<const Z:usize> TrieFile<Z> {
let mut node_index = 0;
let mut key_index = 0;
let mut parent_index = u32::MAX;
let mut traversal = Traversal::None;
let mut result = 0;
@ -128,6 +136,11 @@ impl<const Z:usize> TrieFile<Z> {
// Traverse nodes until key is found.
while key_index < key.len() {
//println!("start k {}/{}", key_index, key.len());
let parent_index = match traversal {
Traversal::None => u32::MAX,
Traversal::Child(id) => id,
Traversal::Next(_, id) => id,
};
self.read_node(node_index, &mut node).await?;
@ -165,9 +178,9 @@ impl<const Z:usize> TrieFile<Z> {
// Continue to child node.
else {
if node.child != 0 {
if node.child != NO_REL {
//println!(" - child");
parent_index = node_index;
traversal = Traversal::Child(node_index);
node_index = node.child;
}
@ -184,64 +197,91 @@ impl<const Z:usize> TrieFile<Z> {
} else {
//println!(" - split");
// Split node into one parent and two children, preserving parent block id.
// Split node into one parent and two children, preserving sequence/id mapping.
key_index += prefix_index;
let prefix = node.bytes[0..prefix_index].to_vec();
let suffix = node.bytes[prefix_index..].to_vec();
node.length = prefix.len() as u8;
for i in prefix_index..node.bytes.len() {
node.bytes[i] = 0;
let new_index = self.allocate().await?;
let mut parent_node = Node::<Z>::new();
// Set parent/child/next (original parent updated later).
parent_node.parent = node.parent;
node.parent = new_index;
parent_node.child = node_index;
node.next = NO_REL;
// Set length and text.
let mut bindex = 0;
node.length = suffix.len() as u8;
while bindex < suffix.len() {
node.bytes[bindex] = suffix[bindex];
bindex += 1;
}
while bindex < node.bytes.len() {
node.bytes[bindex] = 0;
bindex += 1;
}
let child_index = self.allocate().await?;
let mut child_node = Node::<Z>::new();
child_node.parent = node_index;
child_node.length = suffix.len() as u8;
for i in 0..15.min(suffix.len()) {
child_node.bytes[i] = suffix[i];
bindex = 0;
parent_node.length = prefix.len() as u8;
while bindex < prefix.len() {
parent_node.bytes[bindex] = prefix[bindex];
bindex += 1;
}
while bindex < node.bytes.len() {
parent_node.bytes[bindex] = 0;
bindex += 1;
}
child_node.child = node.child;
node.child = child_index;
// Move data to child node.
child_node.has_data = node.has_data;
child_node.data = node.data;
node.has_data = false;
node.data.fill(0);
// Write data to trailing nodes.
if key_index < key.len() {
let new_branch = self.trailing_nodes(key, &mut key_index, data, node_index).await?;
let new_branch = self.trailing_nodes(key, &mut key_index, data, new_index).await?;
result = new_branch as usize;
child_node.next = new_branch;
node.next = new_branch;
}
// Write data to current node.
else {
node.has_data = true;
parent_node.has_data = true;
for i in 0..Z.min(data.len()) {
node.data[i] = data[i];
parent_node.data[i] = data[i];
}
result = node_index as usize;
result = new_index as usize;
}
self.write_node(node_index, &node).await?;
self.write_node(child_index, &child_node).await?;
self.write_node(new_index, &parent_node).await?;
// Update original parent node to new node.
match traversal {
Traversal::None => { }
Traversal::Child(id) => {
self.read_node(id, &mut parent_node).await?;
parent_node.child = new_index;
self.write_node(id, &mut parent_node).await?;
}
Traversal::Next(id, _) => {
self.read_node(id, &mut parent_node).await?;
parent_node.next = new_index;
self.write_node(id, &mut parent_node).await?;
}
};
}
}
// If node does not share prefix with key...
else {
// Move to or create new next node.
if node.next != 0 {
if node.next != NO_REL {
//println!(" - next");
// Move to next node.
traversal = Traversal::Next(node_index, parent_index);
node_index = node.next;
} else {
//println!(" - new next");
@ -299,7 +339,7 @@ impl<const Z:usize> TrieFile<Z> {
// Continue to child node.
else {
if node.child != 0 {
if node.child != NO_REL {
//println!(" - child");
node_index = node.child;
@ -316,7 +356,7 @@ impl<const Z:usize> TrieFile<Z> {
// If node does not share prefix with key...
else {
// Move to or create new next node.
if node.next != 0 {
if node.next != NO_REL {
//println!(" - next");
// Move to next node.
@ -381,10 +421,21 @@ impl<const Z:usize> TrieFile<Z> {
Ok(output)
}
/*pub fn unset(&self, _key:&[u8]) -> Result<(), std::io::Error>
pub async fn unset(&mut self, key:&[u8]) -> Result<(), std::io::Error>
{
let mut node = Node::<Z>::new();
match self.find(key).await {
Ok(Some(id)) => {
self.read_node(id as u32, &mut node).await?;
node.has_data = false;
node.data = [0; Z];
self.write_node(id as u32, &node).await?;
Ok(())
}*/
}
Ok(None) => Ok(()),
Err(e) => Err(e),
}
}
async fn trailing_nodes(&mut self, key:&[u8], key_index:&mut usize, data:&[u8], parent:u32) -> Result<u32, std::io::Error>
{