commit 4ad9095296b01650b16c838292209c2818220be1 Author: yukirij Date: Fri Nov 8 10:44:09 2024 -0800 Initialize repository; initial implementation of BlockFile. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a727c0a --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/data diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..81b2551 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,97 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "pack" +version = "0.1.0" +source = "git+https://git.tsukiyo.org/Utility/pack#a59d38d67bd4962b286c685159161d8edadf422a" +dependencies = [ + "num", +] + +[[package]] +name = "storage" +version = "0.1.0" +dependencies = [ + "pack", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..f89161c --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "storage" +version = "0.1.0" +edition = "2021" + +[dependencies] + +pack = { git = "https://git.tsukiyo.org/Utility/pack" } diff --git a/src/bin/test.rs b/src/bin/test.rs new file mode 100644 index 0000000..b5e7585 --- /dev/null +++ b/src/bin/test.rs @@ -0,0 +1,17 @@ +use storage::BlockFile; + +fn main() +{ + if let Ok(mut bf) = BlockFile::<128>::open("data/cache.bin") { + + for i in 0..760 { + if let Ok(id) = bf.insert(format!("Hello, world! {}. This is long text to increase the block size to sufficient length to roll over into a second block when using smaller block sizes.", i).as_bytes()) { + + let data = String::from_utf8(bf.get(id).unwrap()).unwrap(); + println!("id {} = '{}'", id, data); + } + } + } else { + println!("Failed to open."); + } +} diff --git a/src/blockfile/mod.rs b/src/blockfile/mod.rs new file mode 100644 index 0000000..55c3d12 --- /dev/null +++ b/src/blockfile/mod.rs @@ -0,0 +1,639 @@ +/* +** Multi-object block file storage with recursively paged headers. +** +** [Header:16] +** {AllocTable Head: } (initial 0, 0) +** {ObjectTable Head: } (initial 0, 1) +** +** [Object Table] +** { }* +** +** [Data Block] +** +*/ + +use pack::prelude::*; + +use std::{ + fs::File, + io::{Read, Seek, SeekFrom, Write}, + path::Path, +}; + +const HEADER_SIZE :usize = 16; + +const F_NOVACANCY :u32 = 1 << 31; + +enum Operation { + None, + SetVacant, + SetOccupied, +} + +struct OperResult { + pub result:usize, + pub operation:Operation, +} + +pub struct BlockFile { + file:File, +} +impl BlockFile { + pub fn open>(path:P) -> Result + { + if path.as_ref().exists() { + + // Open existing file. + match File::options() + .read(true) + .write(true) + .open(path) { + Ok(file) => { + Ok(Self { file }) + } + Err(error) => Err(error), + } + } else { + + // Create and initialize new file. + match File::options() + .create_new(true) + .read(true) + .write(true) + .open(path) { + Ok(file) => { + Self { file }.init() + } + Err(error) => Err(error), + } + } + } + + fn init(mut self) -> Result + { + /* + ** Header size and first two blocks are initialized for + ** first pages of allocation table and object table. + */ + let mut data = vec![0u8; HEADER_SIZE + (Z * 4)]; + data[0] = 1; + data[5] = 1; + data[6] = 2; + data[HEADER_SIZE] = 1; + data[HEADER_SIZE + Z] = 0x0F; + data[HEADER_SIZE + (Z * 2)] = 3; + + for i in 0..Self::table_size() { + let index = HEADER_SIZE + (Z * 3) + (i * 8); + let pack_pointer = (1 + i as u32).pack(); + + data[index] = pack_pointer[0]; + data[index + 1] = pack_pointer[1]; + data[index + 2] = pack_pointer[2]; + data[index + 3] = pack_pointer[3]; + } + + self.file.write(&data)?; + Ok(self) + } + + pub fn insert(&mut self, data:&[u8]) -> Result + { + + // Allocate storage blocks + let block_count = (data.len() / (Z - 4)) + ((data.len() % (Z - 4)) != 0) as usize; + //println!("block_count {}", block_count); + + let blocks = self.allocate(block_count.max(1))?; + //println!("blocks {}", blocks.len()); + + // Get object id + let id = self.acquire_object(blocks[0], data.len())?; + + //println!("obj_id {}", id); + + let mut block_data = vec![0u8; Z]; + + // Write data to storage blocks + let mut data_index = 0; + for block_index in 0..block_count { + block_data.fill(0); + + // Copy data slice to buffer + for b in 0..Self::data_size().min(data.len() - (block_index * Self::data_size())) { + block_data[b] = data[data_index + b]; + } + data_index += Self::data_size(); + + // Write pointer to next block to end of buffer + if block_index < block_count - 1 { + let pack_next = (blocks[block_index + 1] as u32).pack(); + block_data[Z - 4] = pack_next[0]; + block_data[Z - 3] = pack_next[1]; + block_data[Z - 2] = pack_next[2]; + block_data[Z - 1] = pack_next[3]; + } + + self.write_block(blocks[block_index], &block_data)?; + } + + Ok(id) + } + + /*pub fn update(&mut self, _id:usize, _data:&[u8]) -> Result<(), std::io::Error> + { + + }*/ + + /*pub fn remove(&mut self, _id:usize) -> Result<(), std::io::Error> + { + + }*/ + + pub fn get(&self, id:usize) -> Result, std::io::Error> + { + // Get first block and data size + let (mut block_id, size) = self.get_object(id)?; + let mut data = Vec::new(); + + // Read blocks until size is full + while block_id != 0 { + let block = self.read_block(block_id)?; + let next_block = u32::unpack(&block, &mut (Z - 4)).unwrap_or_default(); + + //println!("size {} len {}", size, data.len()); + + let data_length = if next_block != 0 { + Z - 4 + } else { + size - data.len() + }; + + data.extend_from_slice(&block[0..data_length]); + + block_id = next_block; + } + + Ok(data) + } + + fn allocate(&mut self, count:usize) -> Result, std::io::Error> + { + let mut b8 = [0u8; 1]; + let mut b32 = [0u8; 4]; + + // Read allocation table root block and depth from file. + self.file.seek(SeekFrom::Start(0))?; + self.file.read_exact(&mut b8)?; + self.file.read_exact(&mut b32)?; + + let root_block = u32::unpack(&b32, &mut 0).unwrap_or_default(); + + let mut blocks = Vec::new(); + + // Acquire next available block in table. + self.allocate_traverse( + root_block, + b8[0] as u32, + 0, + true, + count, + &mut blocks, + )?; + + Ok(blocks) + } + + fn allocate_traverse( + &mut self, + block_id:u32, + depth:u32, + basis:u32, + is_root:bool, + count:usize, + blocks:&mut Vec, + ) -> Result + { + //println!("allocate_traverse()"); + + let mut block = self.read_block(block_id)?; + let mut write_block = false; + + let mut operation = Operation::None; + + //println!("alc node {}, depth {}", block_id, depth); + + // Search table for first vacant or unallocated child. + if depth > 0 { + + /* Check each cell for + ** Vacant cells have a MSB of 0. + ** Unallocated cells must be 0. + */ + let mut byte_index = 0; + let mut cell_index :usize; + for i in 0..Self::table_size() { + let mut next_block_id :Option = None; + cell_index = i; + + let cell_byte_index = byte_index; + let cell_data = u32::unpack(&block, &mut byte_index).expect("failed to unpack during alloc"); + + //println!(" - cell {}", cell_data); + + if cell_data != 0 { + if cell_data & F_NOVACANCY == 0 { + next_block_id = Some(cell_data); + } + } else { + // Prepare leaf block. + let leaf_block = self.end_block()?; + let mut leaf_data = vec![0u8; Z]; + leaf_data[0] = 1; + + // Prepare intermediate depth tables. + let mut next_block = leaf_block; + for i in 1..depth { + // Mark allocation on leaf table. + leaf_data[(i as usize) / 8] |= 1 << (i % 8); + + // Prepare table data + let mut table_data = next_block.pack(); + table_data.resize(Z, 0); + + next_block += 1; + self.write_block(next_block, &table_data)?; + } + + self.write_block(leaf_block, &leaf_data)?; + + next_block_id = Some(next_block); + + + // Update cell with child table reference. + let pack_child = next_block.pack(); + block[cell_byte_index] = pack_child[0]; + block[cell_byte_index + 1] = pack_child[1]; + block[cell_byte_index + 2] = pack_child[2]; + block[cell_byte_index + 3] = pack_child[3]; + write_block = true; + + // If root table allocated last page, generate new root table at greater depth. + if is_root && i == Self::table_size() - 1 { + let parent_blocks = self.allocate(1)?; + let mut table_data = vec![0u8; Z]; + + // Update file header with new root table and depth. + self.file.seek(SeekFrom::Start(0))?; + self.file.write(&[ + (depth as u8).pack(), + parent_blocks[0].pack(), + ].concat())?; + + // Add current table to first element of new table. + let packed_id = block_id.pack(); + table_data.fill(0); + table_data[0] = packed_id[0]; + table_data[1] = packed_id[1]; + table_data[2] = packed_id[2]; + table_data[3] = packed_id[3]; + self.write_block(parent_blocks[0], &table_data)?; + + // Update current table before restarting recursion. + self.write_block(block_id, &block)?; + + // Restart recursion with new root. + self.allocate_traverse(parent_blocks[0], depth + 1, basis, true, count, blocks)?; + return Ok(Operation::None); + } + } + + // Search child table for blocks and update vacancy if requested. + if let Some(next_block_id) = next_block_id { + let next_basis = Self::table_cell_offset(depth - 1, cell_index as u32, basis) * Self::pool_size() as u32; + + //println!("@next basis {} from d {} c {} b {}", next_basis, depth, cell_index, basis); + + match self.allocate_traverse(next_block_id, depth - 1, next_basis, false, count, blocks)? { + Operation::SetOccupied => { + write_block = true; + + block[cell_byte_index + 3] |= 0x80; + + // If last cell is marked occupied, this table is also occupied. + if cell_index == Self::table_size() { + operation = Operation::SetOccupied; + } + } + _ => { } + } + } + + //println!(" - blocks {}", blocks.len()); + if blocks.len() == count { break; } + } + } + + // Find first cell (byte) with unset bit. + else { + /* ASSUMPTION + ** This procedure should not be reachable unless table has vacancy. + */ + + // Find first unset bit in block. + let mut byte_index = 0; + while byte_index < block.len() && blocks.len() < count { + if block[byte_index] != 0xFF { + let bit = block[byte_index].trailing_ones(); + let id = basis + (byte_index * 8) as u32 + bit; + + //println!(" - cell {} value {:02x} bit {} alloc_id {}", byte_index, block[byte_index], bit, id); + + // Catch most common corruption case. + if id < 4 { panic!("invalid allocation (a < 4)"); } + + // Add block id to output. + blocks.push(id); + //println!(" - basis {} byte_index {} byte {:02x} bit {} alloc {}", basis, byte_index, block[byte_index], bit, id); + + // Mark block as occupied. + block[byte_index] |= 1 << bit; + write_block = true; + } else { + byte_index += 1; + } + } + + // If table is fully allocated, signal parent to mark table as not vacant. + if byte_index == block.len() && block[block.len() - 1] == 0xFF { + operation = Operation::SetOccupied; + //println!("OCCUPIED!"); + } + } + + if write_block { + self.write_block(block_id, &block)?; + } + + Ok(operation) + } + + fn acquire_object(&mut self, data_id:u32, length:usize) -> Result + { + //println!("acquire_object()"); + + let mut b8 = [0u8; 1]; + let mut b32 = [0u8; 4]; + + // Read allocation table root block and depth from file. + self.file.seek(SeekFrom::Start(5))?; + self.file.read_exact(&mut b8)?; + let mut depth = b8[0] as u32; + + self.file.read_exact(&mut b32)?; + let mut block_id = u32::unpack(&b32, &mut 0).unwrap_or_default(); + + self.file.read_exact(&mut b32)?; + let object_id = u32::unpack(&b32, &mut 0).unwrap_or_default() as usize; + + + // Regenerate root header until range includes object_id. + let mut range = Self::table_offset(depth + 1) as usize; + let old_depth = depth; + + let mut root_data = vec![0u8; Z]; + while object_id > range { + let allocation = self.allocate(1)?; + + let packed_id = block_id.pack(); + root_data[0] = packed_id[0]; + root_data[1] = packed_id[1]; + root_data[2] = packed_id[2]; + root_data[3] = packed_id[3]; + + block_id = allocation[0]; + depth += 1; + + self.write_block(block_id, &root_data)?; + + range = Self::table_offset(depth + 1) as usize; + } + + // Update header pointer and depth if changed. + if old_depth != depth { + let pack_depth = [ depth as u8 ]; + let pack_pointer = block_id.pack(); + + self.file.seek(SeekFrom::Start(5))?; + self.file.write(&pack_depth)?; + self.file.write(&pack_pointer)?; + } + + let mut basis = 0; + + // Search table for first vacant or unallocated child. + while depth > 0 { + /* + ** Select child tables containing object_id until depth is 0. + */ + + //println!("basis: {}", basis); + + let mut block = self.read_block(block_id)?; + let mut write_block = false; + + let cell_index = (object_id - basis) / Self::table_offset(depth) as usize; + //println!(" - ci {}", cell_index); + + let cell_start = cell_index * 4; + let cell_data = u32::unpack(&block, &mut cell_start.clone()).unwrap_or_default(); + + // Allocate new page if pointer is zero. + let child_id = if cell_data == 0 { + let allocation = self.allocate(1)?; + + // Write new reference to table. + let pack_block = allocation[0].pack(); + block[cell_start] = pack_block[0]; + block[cell_start + 1] = pack_block[1]; + block[cell_start + 2] = pack_block[2]; + block[cell_start + 3] = pack_block[3]; + + write_block = true; + + // Populate child table. + let mut table_data = vec![0u8; Z]; + + // If child is leaf, populate next cell pointers. + if depth == 1 { + let pointer_basis = Self::table_cell_offset(depth, cell_index as u32, basis as u32) + 1; + + for i in 0..Self::table_size() { + let index = i * 8; + let pack_pointer = (pointer_basis + i as u32).pack(); + + table_data[index] = pack_pointer[0]; + table_data[index + 1] = pack_pointer[1]; + table_data[index + 2] = pack_pointer[2]; + table_data[index + 3] = pack_pointer[3]; + } + } + + self.write_block(allocation[0], &table_data)?; + + allocation[0] + } else { + u32::unpack(&block, &mut cell_start.clone()).unwrap_or_default() + }; + + if write_block { + self.write_block(block_id, &block)?; + } + + // Update frame of reference to child table. + block_id = child_id; + basis = Self::table_cell_offset(depth, cell_index as u32, basis as u32) as usize; + depth -= 1; + } + + //println!("end basis: {}", basis); + + // Update block and header with object information. + let mut block = self.read_block(block_id)?; + + let cell_index = (object_id - basis) / Self::table_offset(depth) as usize; + let cell_start = cell_index * 8; + let next_pointer = u32::unpack(&block, &mut cell_start.clone()).unwrap_or_default(); + + //println!(" - next ptr: {}", next_pointer); + + // Update cell with data location and length. + let pack_location = data_id.pack(); + block[cell_start] = pack_location[0]; + block[cell_start + 1] = pack_location[1]; + block[cell_start + 2] = pack_location[2]; + block[cell_start + 3] = pack_location[3]; + + let pack_length = (length as u32).pack(); + block[cell_start + 4] = pack_length[0]; + block[cell_start + 5] = pack_length[1]; + block[cell_start + 6] = pack_length[2]; + block[cell_start + 7] = pack_length[3]; + + // Update header with new pointer. + let pack_pointer = next_pointer.pack(); + self.file.seek(SeekFrom::Start(10))?; + self.file.write(&pack_pointer)?; + + + self.write_block(block_id, &block)?; + + Ok(object_id) + } + + fn get_object(&self, id:usize) -> Result<(u32, usize), std::io::Error> + { + //println!("get_object()"); + + let mut file = self.file.try_clone()?; + + let mut b8 = [0u8; 1]; + let mut b32 = [0u8; 4]; + + // Read allocation table root block and depth from file. + file.seek(SeekFrom::Start(5))?; + file.read_exact(&mut b8)?; + let mut depth = b8[0] as u32; + + file.read_exact(&mut b32)?; + let mut block_id = u32::unpack(&b32, &mut 0).unwrap_or_default(); + + let mut basis = 0; + + // Search table for first vacant or unallocated child. + while depth > 0 { + /* + ** Select child tables containing object_id until depth is 0. + */ + + let block = self.read_block(block_id)?; + + let cell_index = (id - basis) / Self::table_offset(depth) as usize; + + let cell_start = cell_index * 4; + let cell_data = u32::unpack(&block, &mut cell_start.clone()).unwrap_or_default(); + + let child_id = if cell_data != 0 { + u32::unpack(&block, &mut cell_start.clone()).unwrap_or_default() + } else { + return Err(std::io::Error::new(std::io::ErrorKind::NotFound, "object id not valid")); + }; + + // Update frame of reference to child table. + block_id = child_id; + basis = Self::table_cell_offset(depth, cell_index as u32, basis as u32) as usize; + depth -= 1; + } + + // Get object pointer and length from cell. + let block = self.read_block(block_id)?; + + let cell_index = (id - basis) / Self::table_offset(depth) as usize; + let mut cell_start = cell_index * 8; + let pointer = u32::unpack(&block, &mut cell_start).unwrap_or_default(); + let length = u32::unpack(&block, &mut cell_start).unwrap_or_default(); + + Ok((pointer, length as usize)) + } + + fn read_block(&self, block_id:u32) -> Result, std::io::Error> + { + let mut file = self.file.try_clone()?; + + let mut data = vec![0u8; Z]; + file.seek(SeekFrom::Start((HEADER_SIZE + (Z * block_id as usize)) as u64))?; + file.read(&mut data)?; + + Ok(data) + } + + fn write_block(&mut self, block_id:u32, data:&[u8]) -> Result<(), std::io::Error> + { + self.file.seek(SeekFrom::Start((HEADER_SIZE + (Z * block_id as usize)) as u64))?; + self.file.write(&data[0..Z])?; + + Ok(()) + } + + fn end_block(&self) -> Result + { + let mut file = self.file.try_clone()?; + let index = file.seek(SeekFrom::End(0))? as usize; + Ok((1 + (index - HEADER_SIZE) / Z) as u32) + } + + fn table_cell_offset(depth:u32, cell:u32, basis:u32) -> u32 + { + (Self::table_offset(depth) * cell) + basis + } + + fn table_offset(depth:u32) -> u32 + { + (Self::table_size() as u32).pow(depth) + } + + const fn data_size() -> usize + { + Z - 4 + } + + const fn table_size() -> usize + { + Z / 8 + } + + const fn pool_size() -> usize + { + Z * 8 + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..fa1f756 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,4 @@ +#![allow(dead_code)] + +mod blockfile; pub use blockfile::BlockFile; +//mod triefile; pub use triefile::TrieFile;