Add ids, key, get to TrieFile; rename get to find.

This commit is contained in:
yukirij 2025-04-07 22:05:24 -07:00
parent adad61490b
commit 1d4344bc81
3 changed files with 114 additions and 22 deletions

View File

@ -5,6 +5,7 @@ fn main()
{
std::fs::create_dir_all("data").ok();
/*
if let Ok(mut bf) = BlockFile::<16>::open("data/cache_data.bin") {
if let Ok(id) = bf.insert("This is a test of the block file system.".as_bytes()) {
let data = String::from_utf8(bf.get(id).unwrap()).unwrap();
@ -13,8 +14,9 @@ fn main()
} else {
println!("Failed to open.");
}
*/
/*if let Ok(mut tf) = TrieFile::<8>::open("data/cache_index.bin") {
if let Ok(mut tf) = TrieFile::<8>::open("data/cache_index.bin") {
for s in [
"Hello",
@ -29,12 +31,32 @@ fn main()
println!("Failed to insert '{}'.", s);
}
if let Some(data) = tf.get(s.as_bytes()).unwrap() {
println!("found '{}'.", String::from_utf8(data).unwrap());
if let Ok(Some(id)) = tf.find(s.as_bytes()) {
println!("found '{}'.", id);
if let Ok(key) = tf.key(id) {
println!(" > '{}'", String::from_utf8(key).unwrap());
}
if let Ok(Some(data)) = tf.get(id) {
println!(" = '{}'", String::from_utf8(data).unwrap());
}
}
}
for i in tf.ids().unwrap() {
println!("id {}", i);
if let Ok(key) = tf.key(i) {
println!(" > '{}'", String::from_utf8(key).unwrap());
}
if let Ok(Some(data)) = tf.get(i) {
println!(" = '{}'", String::from_utf8(data).unwrap());
}
}
} else {
println!("Failed to open index.");
}*/
}
}

View File

@ -238,6 +238,17 @@ impl<const Z:usize> BlockFile<Z> {
Ok(data)
}
pub fn ids(&self) -> Result<Vec<usize>, std::io::Error>
// Traverses object list and returns allocated ids.
//
{
let output = Vec::new();
//
Ok(output)
}
fn allocate(&mut self, count:usize) -> Result<Vec<u32>, std::io::Error>
// Mark as allocated and return the next available N blocks.
//

View File

@ -16,8 +16,9 @@ use std::{
};
struct Node<const Z:usize> {
length:usize,
length:u8,
bytes:[u8; 15],
parent:u32,
next:u32,
child:u32,
has_data:bool,
@ -29,6 +30,7 @@ impl<const Z:usize> Node<Z> {
Self {
length:0,
bytes:[0; 15],
parent:u32::MAX,
next:0,
child:0,
has_data:false,
@ -45,6 +47,7 @@ impl<const Z:usize> Node<Z> {
[
flags.pack(),
self.bytes.to_vec(),
self.parent.pack(),
self.next.pack(),
self.child.pack(),
self.data.to_vec(),
@ -55,7 +58,7 @@ impl<const Z:usize> Node<Z> {
{
if data.len() == Self::len() {
let flags = u8::unpack(data, index)?;
self.length = (flags & 0x0F) as usize;
self.length = (flags & 0x0F) as u8;
self.has_data = (flags & 0x80) != 0;
for i in 0..15 {
@ -63,6 +66,7 @@ impl<const Z:usize> Node<Z> {
*index += 1;
}
self.parent = u32::unpack(data, index)?;
self.next = u32::unpack(data, index)?;
self.child = u32::unpack(data, index)?;
@ -79,7 +83,7 @@ impl<const Z:usize> Node<Z> {
const fn len() -> usize
{
24 + Z
28 + Z
}
}
@ -109,11 +113,12 @@ impl<const Z:usize> TrieFile<Z> {
let mut node_index = 0;
let mut key_index = 0;
let mut parent_index = u32::MAX;
// Allocate first chain of nodes if none exist.
if self.block_count()? == 0 {
//println!("originate");
self.trailing_nodes(key, &mut key_index, data)?;
self.trailing_nodes(key, &mut key_index, data, u32::MAX)?;
return Ok(());
} else {
//println!("traverse");
@ -130,7 +135,7 @@ impl<const Z:usize> TrieFile<Z> {
// Count length of shared prefix
let mut prefix_index = 0;
while prefix_index < node.length
while prefix_index < node.length as usize
&& (key_index + prefix_index) < key.len()
&& key[key_index + prefix_index] == node.bytes[prefix_index] {
prefix_index += 1;
@ -138,7 +143,7 @@ impl<const Z:usize> TrieFile<Z> {
//println!("prefix {}", prefix_index);
if prefix_index == node.length {
if prefix_index == node.length as usize {
key_index += prefix_index;
// Copy data to node, mark as occupied, and write to file.
@ -159,7 +164,7 @@ impl<const Z:usize> TrieFile<Z> {
else {
if node.child != 0 {
//println!(" - child");
parent_index = node_index;
node_index = node.child;
}
@ -167,7 +172,7 @@ impl<const Z:usize> TrieFile<Z> {
else {
//println!(" - new child");
node.child = self.trailing_nodes(key, &mut key_index, data)?;
node.child = self.trailing_nodes(key, &mut key_index, data, node_index)?;
self.write_node(node_index, &node)?;
}
}
@ -182,8 +187,7 @@ impl<const Z:usize> TrieFile<Z> {
let prefix = node.bytes[0..prefix_index].to_vec();
let suffix = node.bytes[prefix_index..].to_vec();
//
node.length = prefix.len();
node.length = prefix.len() as u8;
for i in prefix_index..node.bytes.len() {
node.bytes[i] = 0;
}
@ -191,7 +195,8 @@ impl<const Z:usize> TrieFile<Z> {
let child_index = self.allocate()?;
let mut child_node = Node::<Z>::new();
child_node.length = suffix.len();
child_node.parent = node_index;
child_node.length = suffix.len() as u8;
for i in 0..15.min(suffix.len()) {
child_node.bytes[i] = suffix[i];
}
@ -207,7 +212,7 @@ impl<const Z:usize> TrieFile<Z> {
// Write data to trailing nodes.
if key_index < key.len() {
let new_branch = self.trailing_nodes(key, &mut key_index, data)?;
let new_branch = self.trailing_nodes(key, &mut key_index, data, node_index)?;
child_node.next = new_branch;
}
@ -236,7 +241,7 @@ impl<const Z:usize> TrieFile<Z> {
//println!(" - new next");
// Allocate and initialize subsequent nodes until key is resolved.
node.next = self.trailing_nodes(key, &mut key_index, data)?;
node.next = self.trailing_nodes(key, &mut key_index, data, parent_index)?;
self.write_node(node_index, &node)?;
}
}
@ -246,7 +251,7 @@ impl<const Z:usize> TrieFile<Z> {
Ok(())
}
pub fn get(&self, key:&[u8]) -> Result<Option<Vec<u8>>, std::io::Error>
pub fn find(&self, key:&[u8]) -> Result<Option<usize>, std::io::Error>
{
let mut node = Node::<Z>::new();
@ -267,7 +272,7 @@ impl<const Z:usize> TrieFile<Z> {
// Count length of shared prefix
let mut prefix_index = 0;
while prefix_index < node.length
while prefix_index < node.length as usize
&& (key_index + prefix_index) < key.len()
&& key[key_index + prefix_index] == node.bytes[prefix_index] {
prefix_index += 1;
@ -275,14 +280,14 @@ impl<const Z:usize> TrieFile<Z> {
//println!("prefix {}", prefix_index);
if prefix_index == node.length {
if prefix_index == node.length as usize {
key_index += prefix_index;
// Return node contents.
if key_index == key.len() {
//println!(" - found node");
return Ok(Some(node.data.to_vec()));
return Ok(Some(node_index as usize));
}
// Continue to child node.
@ -319,20 +324,74 @@ impl<const Z:usize> TrieFile<Z> {
Ok(None)
}
pub fn key(&self, id:usize) -> Result<Vec<u8>, std::io::Error>
{
let mut node_id = id as u32;
let mut bytes = Vec::new();
let mut node = Node::<Z>::new();
while node_id != u32::MAX {
self.read_node(node_id, &mut node)?;
for i in (0..node.length as usize).rev() {
bytes.push(node.bytes[i]);
}
node_id = node.parent;
}
bytes.reverse();
Ok(bytes)
}
pub fn get(&self, id:usize) -> Result<Option<Vec<u8>>, std::io::Error>
{
let mut node = Node::<Z>::new();
self.read_node(id as u32, &mut node)?;
if node.has_data {
Ok(Some(node.data.to_vec()))
} else {
Ok(None)
}
}
pub fn ids(&self) -> Result<Vec<usize>, std::io::Error>
{
let mut output = Vec::new();
let mut node = Node::<Z>::new();
let length = self.block_count()? as usize;
for i in 0..length {
self.read_node(i as u32, &mut node)?;
if node.has_data {
output.push(i);
}
}
Ok(output)
}
/*pub fn unset(&self, _key:&[u8]) -> Result<(), std::io::Error>
{
Ok(())
}*/
fn trailing_nodes(&mut self, key:&[u8], key_index:&mut usize, data:&[u8]) -> Result<u32, std::io::Error>
fn trailing_nodes(&mut self, key:&[u8], key_index:&mut usize, data:&[u8], parent:u32) -> Result<u32, std::io::Error>
{
let starting_node = self.allocate()?;
let mut node :Node<Z>;
let mut node_index = starting_node;
let mut parent_node = parent;
while *key_index < key.len() {
node = Node::<Z>::new();
node.parent = parent_node;
parent_node = node_index;
// Copy key bytes to node.
for i in 0..15.min(key.len() - *key_index) {
node.bytes[i] = key[*key_index];