Initialize repository; initial implementation of BlockFile.

This commit is contained in:
yukirij 2024-11-08 10:44:09 -08:00
commit 4ad9095296
6 changed files with 767 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
/data

97
Cargo.lock generated Normal file
View File

@ -0,0 +1,97 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "autocfg"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]]
name = "num"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
dependencies = [
"num-bigint",
"num-complex",
"num-integer",
"num-iter",
"num-rational",
"num-traits",
]
[[package]]
name = "num-bigint"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
dependencies = [
"num-integer",
"num-traits",
]
[[package]]
name = "num-complex"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
dependencies = [
"num-traits",
]
[[package]]
name = "num-integer"
version = "0.1.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
dependencies = [
"num-traits",
]
[[package]]
name = "num-iter"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-rational"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
dependencies = [
"num-bigint",
"num-integer",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "pack"
version = "0.1.0"
source = "git+https://git.tsukiyo.org/Utility/pack#a59d38d67bd4962b286c685159161d8edadf422a"
dependencies = [
"num",
]
[[package]]
name = "storage"
version = "0.1.0"
dependencies = [
"pack",
]

8
Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "storage"
version = "0.1.0"
edition = "2021"
[dependencies]
pack = { git = "https://git.tsukiyo.org/Utility/pack" }

17
src/bin/test.rs Normal file
View File

@ -0,0 +1,17 @@
use storage::BlockFile;
fn main()
{
if let Ok(mut bf) = BlockFile::<128>::open("data/cache.bin") {
for i in 0..760 {
if let Ok(id) = bf.insert(format!("Hello, world! {}. This is long text to increase the block size to sufficient length to roll over into a second block when using smaller block sizes.", i).as_bytes()) {
let data = String::from_utf8(bf.get(id).unwrap()).unwrap();
println!("id {} = '{}'", id, data);
}
}
} else {
println!("Failed to open.");
}
}

639
src/blockfile/mod.rs Normal file
View File

@ -0,0 +1,639 @@
/*
** Multi-object block file storage with recursively paged headers.
**
** [Header:16]
** {AllocTable Head: <Depth:1> <Block:4> } (initial 0, 0)
** {ObjectTable Head: <Depth:1> <Block:4> } (initial 0, 1)
**
** [Object Table]
** { <Pointer:4> <Length:4> }*
**
** [Data Block]
** <Data:Z-4> <Next:4>
*/
use pack::prelude::*;
use std::{
fs::File,
io::{Read, Seek, SeekFrom, Write},
path::Path,
};
const HEADER_SIZE :usize = 16;
const F_NOVACANCY :u32 = 1 << 31;
enum Operation {
None,
SetVacant,
SetOccupied,
}
struct OperResult {
pub result:usize,
pub operation:Operation,
}
pub struct BlockFile<const Z:usize> {
file:File,
}
impl<const Z:usize> BlockFile<Z> {
pub fn open<P:AsRef<Path>>(path:P) -> Result<Self, std::io::Error>
{
if path.as_ref().exists() {
// Open existing file.
match File::options()
.read(true)
.write(true)
.open(path) {
Ok(file) => {
Ok(Self { file })
}
Err(error) => Err(error),
}
} else {
// Create and initialize new file.
match File::options()
.create_new(true)
.read(true)
.write(true)
.open(path) {
Ok(file) => {
Self { file }.init()
}
Err(error) => Err(error),
}
}
}
fn init(mut self) -> Result<Self, std::io::Error>
{
/*
** Header size and first two blocks are initialized for
** first pages of allocation table and object table.
*/
let mut data = vec![0u8; HEADER_SIZE + (Z * 4)];
data[0] = 1;
data[5] = 1;
data[6] = 2;
data[HEADER_SIZE] = 1;
data[HEADER_SIZE + Z] = 0x0F;
data[HEADER_SIZE + (Z * 2)] = 3;
for i in 0..Self::table_size() {
let index = HEADER_SIZE + (Z * 3) + (i * 8);
let pack_pointer = (1 + i as u32).pack();
data[index] = pack_pointer[0];
data[index + 1] = pack_pointer[1];
data[index + 2] = pack_pointer[2];
data[index + 3] = pack_pointer[3];
}
self.file.write(&data)?;
Ok(self)
}
pub fn insert(&mut self, data:&[u8]) -> Result<usize, std::io::Error>
{
// Allocate storage blocks
let block_count = (data.len() / (Z - 4)) + ((data.len() % (Z - 4)) != 0) as usize;
//println!("block_count {}", block_count);
let blocks = self.allocate(block_count.max(1))?;
//println!("blocks {}", blocks.len());
// Get object id
let id = self.acquire_object(blocks[0], data.len())?;
//println!("obj_id {}", id);
let mut block_data = vec![0u8; Z];
// Write data to storage blocks
let mut data_index = 0;
for block_index in 0..block_count {
block_data.fill(0);
// Copy data slice to buffer
for b in 0..Self::data_size().min(data.len() - (block_index * Self::data_size())) {
block_data[b] = data[data_index + b];
}
data_index += Self::data_size();
// Write pointer to next block to end of buffer
if block_index < block_count - 1 {
let pack_next = (blocks[block_index + 1] as u32).pack();
block_data[Z - 4] = pack_next[0];
block_data[Z - 3] = pack_next[1];
block_data[Z - 2] = pack_next[2];
block_data[Z - 1] = pack_next[3];
}
self.write_block(blocks[block_index], &block_data)?;
}
Ok(id)
}
/*pub fn update(&mut self, _id:usize, _data:&[u8]) -> Result<(), std::io::Error>
{
}*/
/*pub fn remove(&mut self, _id:usize) -> Result<(), std::io::Error>
{
}*/
pub fn get(&self, id:usize) -> Result<Vec<u8>, std::io::Error>
{
// Get first block and data size
let (mut block_id, size) = self.get_object(id)?;
let mut data = Vec::new();
// Read blocks until size is full
while block_id != 0 {
let block = self.read_block(block_id)?;
let next_block = u32::unpack(&block, &mut (Z - 4)).unwrap_or_default();
//println!("size {} len {}", size, data.len());
let data_length = if next_block != 0 {
Z - 4
} else {
size - data.len()
};
data.extend_from_slice(&block[0..data_length]);
block_id = next_block;
}
Ok(data)
}
fn allocate(&mut self, count:usize) -> Result<Vec<u32>, std::io::Error>
{
let mut b8 = [0u8; 1];
let mut b32 = [0u8; 4];
// Read allocation table root block and depth from file.
self.file.seek(SeekFrom::Start(0))?;
self.file.read_exact(&mut b8)?;
self.file.read_exact(&mut b32)?;
let root_block = u32::unpack(&b32, &mut 0).unwrap_or_default();
let mut blocks = Vec::new();
// Acquire next available block in table.
self.allocate_traverse(
root_block,
b8[0] as u32,
0,
true,
count,
&mut blocks,
)?;
Ok(blocks)
}
fn allocate_traverse(
&mut self,
block_id:u32,
depth:u32,
basis:u32,
is_root:bool,
count:usize,
blocks:&mut Vec<u32>,
) -> Result<Operation, std::io::Error>
{
//println!("allocate_traverse()");
let mut block = self.read_block(block_id)?;
let mut write_block = false;
let mut operation = Operation::None;
//println!("alc node {}, depth {}", block_id, depth);
// Search table for first vacant or unallocated child.
if depth > 0 {
/* Check each cell for
** Vacant cells have a MSB of 0.
** Unallocated cells must be 0.
*/
let mut byte_index = 0;
let mut cell_index :usize;
for i in 0..Self::table_size() {
let mut next_block_id :Option<u32> = None;
cell_index = i;
let cell_byte_index = byte_index;
let cell_data = u32::unpack(&block, &mut byte_index).expect("failed to unpack during alloc");
//println!(" - cell {}", cell_data);
if cell_data != 0 {
if cell_data & F_NOVACANCY == 0 {
next_block_id = Some(cell_data);
}
} else {
// Prepare leaf block.
let leaf_block = self.end_block()?;
let mut leaf_data = vec![0u8; Z];
leaf_data[0] = 1;
// Prepare intermediate depth tables.
let mut next_block = leaf_block;
for i in 1..depth {
// Mark allocation on leaf table.
leaf_data[(i as usize) / 8] |= 1 << (i % 8);
// Prepare table data
let mut table_data = next_block.pack();
table_data.resize(Z, 0);
next_block += 1;
self.write_block(next_block, &table_data)?;
}
self.write_block(leaf_block, &leaf_data)?;
next_block_id = Some(next_block);
// Update cell with child table reference.
let pack_child = next_block.pack();
block[cell_byte_index] = pack_child[0];
block[cell_byte_index + 1] = pack_child[1];
block[cell_byte_index + 2] = pack_child[2];
block[cell_byte_index + 3] = pack_child[3];
write_block = true;
// If root table allocated last page, generate new root table at greater depth.
if is_root && i == Self::table_size() - 1 {
let parent_blocks = self.allocate(1)?;
let mut table_data = vec![0u8; Z];
// Update file header with new root table and depth.
self.file.seek(SeekFrom::Start(0))?;
self.file.write(&[
(depth as u8).pack(),
parent_blocks[0].pack(),
].concat())?;
// Add current table to first element of new table.
let packed_id = block_id.pack();
table_data.fill(0);
table_data[0] = packed_id[0];
table_data[1] = packed_id[1];
table_data[2] = packed_id[2];
table_data[3] = packed_id[3];
self.write_block(parent_blocks[0], &table_data)?;
// Update current table before restarting recursion.
self.write_block(block_id, &block)?;
// Restart recursion with new root.
self.allocate_traverse(parent_blocks[0], depth + 1, basis, true, count, blocks)?;
return Ok(Operation::None);
}
}
// Search child table for blocks and update vacancy if requested.
if let Some(next_block_id) = next_block_id {
let next_basis = Self::table_cell_offset(depth - 1, cell_index as u32, basis) * Self::pool_size() as u32;
//println!("@next basis {} from d {} c {} b {}", next_basis, depth, cell_index, basis);
match self.allocate_traverse(next_block_id, depth - 1, next_basis, false, count, blocks)? {
Operation::SetOccupied => {
write_block = true;
block[cell_byte_index + 3] |= 0x80;
// If last cell is marked occupied, this table is also occupied.
if cell_index == Self::table_size() {
operation = Operation::SetOccupied;
}
}
_ => { }
}
}
//println!(" - blocks {}", blocks.len());
if blocks.len() == count { break; }
}
}
// Find first cell (byte) with unset bit.
else {
/* ASSUMPTION
** This procedure should not be reachable unless table has vacancy.
*/
// Find first unset bit in block.
let mut byte_index = 0;
while byte_index < block.len() && blocks.len() < count {
if block[byte_index] != 0xFF {
let bit = block[byte_index].trailing_ones();
let id = basis + (byte_index * 8) as u32 + bit;
//println!(" - cell {} value {:02x} bit {} alloc_id {}", byte_index, block[byte_index], bit, id);
// Catch most common corruption case.
if id < 4 { panic!("invalid allocation (a < 4)"); }
// Add block id to output.
blocks.push(id);
//println!(" - basis {} byte_index {} byte {:02x} bit {} alloc {}", basis, byte_index, block[byte_index], bit, id);
// Mark block as occupied.
block[byte_index] |= 1 << bit;
write_block = true;
} else {
byte_index += 1;
}
}
// If table is fully allocated, signal parent to mark table as not vacant.
if byte_index == block.len() && block[block.len() - 1] == 0xFF {
operation = Operation::SetOccupied;
//println!("OCCUPIED!");
}
}
if write_block {
self.write_block(block_id, &block)?;
}
Ok(operation)
}
fn acquire_object(&mut self, data_id:u32, length:usize) -> Result<usize, std::io::Error>
{
//println!("acquire_object()");
let mut b8 = [0u8; 1];
let mut b32 = [0u8; 4];
// Read allocation table root block and depth from file.
self.file.seek(SeekFrom::Start(5))?;
self.file.read_exact(&mut b8)?;
let mut depth = b8[0] as u32;
self.file.read_exact(&mut b32)?;
let mut block_id = u32::unpack(&b32, &mut 0).unwrap_or_default();
self.file.read_exact(&mut b32)?;
let object_id = u32::unpack(&b32, &mut 0).unwrap_or_default() as usize;
// Regenerate root header until range includes object_id.
let mut range = Self::table_offset(depth + 1) as usize;
let old_depth = depth;
let mut root_data = vec![0u8; Z];
while object_id > range {
let allocation = self.allocate(1)?;
let packed_id = block_id.pack();
root_data[0] = packed_id[0];
root_data[1] = packed_id[1];
root_data[2] = packed_id[2];
root_data[3] = packed_id[3];
block_id = allocation[0];
depth += 1;
self.write_block(block_id, &root_data)?;
range = Self::table_offset(depth + 1) as usize;
}
// Update header pointer and depth if changed.
if old_depth != depth {
let pack_depth = [ depth as u8 ];
let pack_pointer = block_id.pack();
self.file.seek(SeekFrom::Start(5))?;
self.file.write(&pack_depth)?;
self.file.write(&pack_pointer)?;
}
let mut basis = 0;
// Search table for first vacant or unallocated child.
while depth > 0 {
/*
** Select child tables containing object_id until depth is 0.
*/
//println!("basis: {}", basis);
let mut block = self.read_block(block_id)?;
let mut write_block = false;
let cell_index = (object_id - basis) / Self::table_offset(depth) as usize;
//println!(" - ci {}", cell_index);
let cell_start = cell_index * 4;
let cell_data = u32::unpack(&block, &mut cell_start.clone()).unwrap_or_default();
// Allocate new page if pointer is zero.
let child_id = if cell_data == 0 {
let allocation = self.allocate(1)?;
// Write new reference to table.
let pack_block = allocation[0].pack();
block[cell_start] = pack_block[0];
block[cell_start + 1] = pack_block[1];
block[cell_start + 2] = pack_block[2];
block[cell_start + 3] = pack_block[3];
write_block = true;
// Populate child table.
let mut table_data = vec![0u8; Z];
// If child is leaf, populate next cell pointers.
if depth == 1 {
let pointer_basis = Self::table_cell_offset(depth, cell_index as u32, basis as u32) + 1;
for i in 0..Self::table_size() {
let index = i * 8;
let pack_pointer = (pointer_basis + i as u32).pack();
table_data[index] = pack_pointer[0];
table_data[index + 1] = pack_pointer[1];
table_data[index + 2] = pack_pointer[2];
table_data[index + 3] = pack_pointer[3];
}
}
self.write_block(allocation[0], &table_data)?;
allocation[0]
} else {
u32::unpack(&block, &mut cell_start.clone()).unwrap_or_default()
};
if write_block {
self.write_block(block_id, &block)?;
}
// Update frame of reference to child table.
block_id = child_id;
basis = Self::table_cell_offset(depth, cell_index as u32, basis as u32) as usize;
depth -= 1;
}
//println!("end basis: {}", basis);
// Update block and header with object information.
let mut block = self.read_block(block_id)?;
let cell_index = (object_id - basis) / Self::table_offset(depth) as usize;
let cell_start = cell_index * 8;
let next_pointer = u32::unpack(&block, &mut cell_start.clone()).unwrap_or_default();
//println!(" - next ptr: {}", next_pointer);
// Update cell with data location and length.
let pack_location = data_id.pack();
block[cell_start] = pack_location[0];
block[cell_start + 1] = pack_location[1];
block[cell_start + 2] = pack_location[2];
block[cell_start + 3] = pack_location[3];
let pack_length = (length as u32).pack();
block[cell_start + 4] = pack_length[0];
block[cell_start + 5] = pack_length[1];
block[cell_start + 6] = pack_length[2];
block[cell_start + 7] = pack_length[3];
// Update header with new pointer.
let pack_pointer = next_pointer.pack();
self.file.seek(SeekFrom::Start(10))?;
self.file.write(&pack_pointer)?;
self.write_block(block_id, &block)?;
Ok(object_id)
}
fn get_object(&self, id:usize) -> Result<(u32, usize), std::io::Error>
{
//println!("get_object()");
let mut file = self.file.try_clone()?;
let mut b8 = [0u8; 1];
let mut b32 = [0u8; 4];
// Read allocation table root block and depth from file.
file.seek(SeekFrom::Start(5))?;
file.read_exact(&mut b8)?;
let mut depth = b8[0] as u32;
file.read_exact(&mut b32)?;
let mut block_id = u32::unpack(&b32, &mut 0).unwrap_or_default();
let mut basis = 0;
// Search table for first vacant or unallocated child.
while depth > 0 {
/*
** Select child tables containing object_id until depth is 0.
*/
let block = self.read_block(block_id)?;
let cell_index = (id - basis) / Self::table_offset(depth) as usize;
let cell_start = cell_index * 4;
let cell_data = u32::unpack(&block, &mut cell_start.clone()).unwrap_or_default();
let child_id = if cell_data != 0 {
u32::unpack(&block, &mut cell_start.clone()).unwrap_or_default()
} else {
return Err(std::io::Error::new(std::io::ErrorKind::NotFound, "object id not valid"));
};
// Update frame of reference to child table.
block_id = child_id;
basis = Self::table_cell_offset(depth, cell_index as u32, basis as u32) as usize;
depth -= 1;
}
// Get object pointer and length from cell.
let block = self.read_block(block_id)?;
let cell_index = (id - basis) / Self::table_offset(depth) as usize;
let mut cell_start = cell_index * 8;
let pointer = u32::unpack(&block, &mut cell_start).unwrap_or_default();
let length = u32::unpack(&block, &mut cell_start).unwrap_or_default();
Ok((pointer, length as usize))
}
fn read_block(&self, block_id:u32) -> Result<Vec<u8>, std::io::Error>
{
let mut file = self.file.try_clone()?;
let mut data = vec![0u8; Z];
file.seek(SeekFrom::Start((HEADER_SIZE + (Z * block_id as usize)) as u64))?;
file.read(&mut data)?;
Ok(data)
}
fn write_block(&mut self, block_id:u32, data:&[u8]) -> Result<(), std::io::Error>
{
self.file.seek(SeekFrom::Start((HEADER_SIZE + (Z * block_id as usize)) as u64))?;
self.file.write(&data[0..Z])?;
Ok(())
}
fn end_block(&self) -> Result<u32, std::io::Error>
{
let mut file = self.file.try_clone()?;
let index = file.seek(SeekFrom::End(0))? as usize;
Ok((1 + (index - HEADER_SIZE) / Z) as u32)
}
fn table_cell_offset(depth:u32, cell:u32, basis:u32) -> u32
{
(Self::table_offset(depth) * cell) + basis
}
fn table_offset(depth:u32) -> u32
{
(Self::table_size() as u32).pow(depth)
}
const fn data_size() -> usize
{
Z - 4
}
const fn table_size() -> usize
{
Z / 8
}
const fn pool_size() -> usize
{
Z * 8
}
}

4
src/lib.rs Normal file
View File

@ -0,0 +1,4 @@
#![allow(dead_code)]
mod blockfile; pub use blockfile::BlockFile;
//mod triefile; pub use triefile::TrieFile;