diff --git a/src/digest.rs b/src/digest.rs index aa5bca4..f81e300 100644 --- a/src/digest.rs +++ b/src/digest.rs @@ -130,9 +130,11 @@ impl<'de, const N: usize> Deserialize<'de> for ValueDigest { where D: serde::Deserializer<'de>, { - let bytes: &[u8] = serde::de::Deserialize::deserialize(deserializer)?; - let array = <[u8; N]>::try_from(bytes) - .map_err(|_| serde::de::Error::invalid_length(bytes.len(), &stringify!(N)))?; + // Try to deserialize as a sequence of bytes (for JSON format) + let bytes: Vec = serde::de::Deserialize::deserialize(deserializer)?; + let array = <[u8; N]>::try_from(bytes.as_slice()).map_err(|_| { + serde::de::Error::invalid_length(bytes.len(), &format!("array of length {N}").as_str()) + })?; Ok(ValueDigest(array)) } } diff --git a/src/git/operations.rs b/src/git/operations.rs index b73e902..d182263 100644 --- a/src/git/operations.rs +++ b/src/git/operations.rs @@ -397,14 +397,24 @@ mod tests { #[test] fn test_git_operations_creation() { let temp_dir = TempDir::new().unwrap(); - let store = VersionedKvStore::<32>::init(temp_dir.path()).unwrap(); + // Initialize git repository (regular, not bare) + gix::init(temp_dir.path()).unwrap(); + // Create subdirectory for dataset + let dataset_dir = temp_dir.path().join("dataset"); + std::fs::create_dir_all(&dataset_dir).unwrap(); + let store = VersionedKvStore::<32>::init(&dataset_dir).unwrap(); let _ops = GitOperations::new(store); } #[test] fn test_parse_commit_id() { let temp_dir = TempDir::new().unwrap(); - let store = VersionedKvStore::<32>::init(temp_dir.path()).unwrap(); + // Initialize git repository (regular, not bare) + gix::init(temp_dir.path()).unwrap(); + // Create subdirectory for dataset + let dataset_dir = temp_dir.path().join("dataset"); + std::fs::create_dir_all(&dataset_dir).unwrap(); + let store = VersionedKvStore::<32>::init(&dataset_dir).unwrap(); let ops = GitOperations::new(store); // Test HEAD parsing diff --git a/src/git/storage.rs b/src/git/storage.rs index 004d874..daed189 100644 --- a/src/git/storage.rs +++ b/src/git/storage.rs @@ -33,34 +33,69 @@ pub struct GitNodeStorage { configs: Mutex>>, // Maps ProllyTree hashes to Git object IDs hash_to_object_id: Mutex, gix::ObjectId>>, + // Directory where this dataset's config and mapping files are stored + dataset_dir: std::path::PathBuf, +} + +impl Clone for GitNodeStorage { + fn clone(&self) -> Self { + let cloned = Self { + _repository: self._repository.clone(), + cache: Mutex::new(LruCache::new(NonZeroUsize::new(1000).unwrap())), + configs: Mutex::new(HashMap::new()), + hash_to_object_id: Mutex::new(HashMap::new()), + dataset_dir: self.dataset_dir.clone(), + }; + + // Load the hash mappings for the cloned instance + cloned.load_hash_mappings(); + + cloned + } } impl GitNodeStorage { /// Create a new GitNodeStorage instance - pub fn new(repository: gix::Repository) -> Result { + pub fn new( + repository: gix::Repository, + dataset_dir: std::path::PathBuf, + ) -> Result { let cache_size = NonZeroUsize::new(1000).unwrap(); // Default cache size - Ok(GitNodeStorage { + let storage = GitNodeStorage { _repository: Arc::new(Mutex::new(repository)), cache: Mutex::new(LruCache::new(cache_size)), configs: Mutex::new(HashMap::new()), hash_to_object_id: Mutex::new(HashMap::new()), - }) + dataset_dir, + }; + + // Load existing hash mappings + storage.load_hash_mappings(); + + Ok(storage) } /// Create GitNodeStorage with custom cache size pub fn with_cache_size( repository: gix::Repository, + dataset_dir: std::path::PathBuf, cache_size: usize, ) -> Result { let cache_size = NonZeroUsize::new(cache_size).unwrap_or(NonZeroUsize::new(1000).unwrap()); - Ok(GitNodeStorage { + let storage = GitNodeStorage { _repository: Arc::new(Mutex::new(repository)), cache: Mutex::new(LruCache::new(cache_size)), configs: Mutex::new(HashMap::new()), hash_to_object_id: Mutex::new(HashMap::new()), - }) + dataset_dir, + }; + + // Load existing hash mappings + storage.load_hash_mappings(); + + Ok(storage) } /// Store a node as a Git blob @@ -118,7 +153,14 @@ impl NodeStorage for GitNodeStorage { match self.store_node_as_blob(&node) { Ok(blob_id) => { // Store the mapping between ProllyTree hash and Git object ID - self.hash_to_object_id.lock().unwrap().insert(hash, blob_id); + self.hash_to_object_id + .lock() + .unwrap() + .insert(hash.clone(), blob_id); + + // Persist the mapping to filesystem + self.save_hash_mapping(&hash, &blob_id); + Some(()) } Err(_) => None, @@ -139,14 +181,90 @@ impl NodeStorage for GitNodeStorage { } fn save_config(&self, key: &str, config: &[u8]) { - // Store config in memory for now - // In a real implementation, we'd store this as a Git blob or in a config file + // Store config in memory let mut configs = self.configs.lock().unwrap(); configs.insert(key.to_string(), config.to_vec()); + + // Also persist to filesystem for durability in the dataset directory + let config_path = self.dataset_dir.join(format!("prolly_config_{key}")); + let _ = std::fs::write(config_path, config); } fn get_config(&self, key: &str) -> Option> { - self.configs.lock().unwrap().get(key).cloned() + // First try to get from memory + if let Some(config) = self.configs.lock().unwrap().get(key).cloned() { + return Some(config); + } + + // If not in memory, try to load from filesystem + let config_path = self.dataset_dir.join(format!("prolly_config_{key}")); + if let Ok(config) = std::fs::read(config_path) { + // Cache in memory for future use + self.configs + .lock() + .unwrap() + .insert(key.to_string(), config.clone()); + return Some(config); + } + + None + } +} + +impl GitNodeStorage { + /// Save hash mapping to filesystem + fn save_hash_mapping(&self, hash: &ValueDigest, object_id: &gix::ObjectId) { + let mapping_path = self.dataset_dir.join("prolly_hash_mappings"); + + // Read existing mappings + let mut mappings = if mapping_path.exists() { + std::fs::read_to_string(&mapping_path).unwrap_or_default() + } else { + String::new() + }; + + // Add new mapping - use simple format for now without hex dependency + let hash_bytes: Vec = hash.0.iter().map(|b| format!("{b:02x}")).collect(); + let hash_hex = hash_bytes.join(""); + let object_hex = object_id.to_hex().to_string(); + mappings.push_str(&format!("{hash_hex}:{object_hex}\n")); + + // Write back + let _ = std::fs::write(mapping_path, mappings); + } + + /// Load hash mappings from filesystem + fn load_hash_mappings(&self) { + let mapping_path = self.dataset_dir.join("prolly_hash_mappings"); + + if let Ok(mappings) = std::fs::read_to_string(mapping_path) { + let mut hash_map = self.hash_to_object_id.lock().unwrap(); + + for line in mappings.lines() { + if let Some((hash_hex, object_hex)) = line.split_once(':') { + // Parse hex string manually + if hash_hex.len() == N * 2 { + let mut hash_bytes = Vec::new(); + for i in 0..N { + if let Ok(byte) = u8::from_str_radix(&hash_hex[i * 2..i * 2 + 2], 16) { + hash_bytes.push(byte); + } else { + break; + } + } + + if hash_bytes.len() == N { + if let Ok(object_id) = gix::ObjectId::from_hex(object_hex.as_bytes()) { + let mut hash_array = [0u8; N]; + hash_array.copy_from_slice(&hash_bytes); + let hash = ValueDigest(hash_array); + hash_map.insert(hash, object_id); + } + } + } + } + } + } } } @@ -186,8 +304,8 @@ mod tests { #[test] fn test_git_node_storage_basic_operations() { - let (_temp_dir, repo) = create_test_repo(); - let mut storage = GitNodeStorage::<32>::new(repo).unwrap(); + let (temp_dir, repo) = create_test_repo(); + let mut storage = GitNodeStorage::<32>::new(repo, temp_dir.path().to_path_buf()).unwrap(); let node = create_test_node(); let hash = node.get_hash(); @@ -210,8 +328,10 @@ mod tests { #[test] fn test_cache_functionality() { - let (_temp_dir, repo) = create_test_repo(); - let mut storage = GitNodeStorage::<32>::with_cache_size(repo, 2).unwrap(); + let (temp_dir, repo) = create_test_repo(); + let dataset_dir = temp_dir.path().join("dataset"); + std::fs::create_dir_all(&dataset_dir).unwrap(); + let mut storage = GitNodeStorage::<32>::with_cache_size(repo, dataset_dir, 2).unwrap(); let node1 = create_test_node(); let hash1 = node1.get_hash(); diff --git a/src/git/versioned_store.rs b/src/git/versioned_store.rs index 43a516e..8608352 100644 --- a/src/git/versioned_store.rs +++ b/src/git/versioned_store.rs @@ -17,6 +17,7 @@ use crate::git::storage::GitNodeStorage; use crate::git::types::*; use crate::tree::{ProllyTree, Tree}; use gix::prelude::*; +use std::collections::HashMap; use std::path::Path; /// A versioned key-value store backed by Git and ProllyTree @@ -27,20 +28,67 @@ use std::path::Path; pub struct VersionedKvStore { tree: ProllyTree>, git_repo: gix::Repository, - staging_area: std::collections::HashMap, Option>>, // None = deleted + staging_area: HashMap, Option>>, // None = deleted current_branch: String, } impl VersionedKvStore { + /// Find the git repository root by walking up the directory tree + fn find_git_root>(start_path: P) -> Option { + let mut current = start_path.as_ref().to_path_buf(); + loop { + if current.join(".git").exists() { + return Some(current); + } + if !current.pop() { + break; + } + } + None + } + + /// Check if we're running in the git repository root directory + fn is_in_git_root>(path: P) -> Result { + let path = path + .as_ref() + .canonicalize() + .map_err(|e| GitKvError::GitObjectError(format!("Failed to resolve path: {e}")))?; + + if let Some(git_root) = Self::find_git_root(&path) { + let git_root = git_root.canonicalize().map_err(|e| { + GitKvError::GitObjectError(format!("Failed to resolve git root: {e}")) + })?; + Ok(path == git_root) + } else { + Err(GitKvError::GitObjectError( + "Not inside a git repository. Please run from within a git repository.".to_string(), + )) + } + } + /// Initialize a new versioned KV store at the given path pub fn init>(path: P) -> Result { let path = path.as_ref(); - // Initialize Git repository - let git_repo = gix::init(path).map_err(|e| GitKvError::GitInitError(Box::new(e)))?; + // Reject if trying to initialize in git root directory + if Self::is_in_git_root(path)? { + return Err(GitKvError::GitObjectError( + "Cannot initialize git-prolly in git root directory. Please run from a subdirectory to create a dataset.".to_string() + )); + } + + // Find the git repository + let git_root = Self::find_git_root(path).ok_or_else(|| { + GitKvError::GitObjectError( + "Not inside a git repository. Please run from within a git repository.".to_string(), + ) + })?; + + // Open the existing git repository instead of initializing a new one + let git_repo = gix::open(&git_root).map_err(|e| GitKvError::GitOpenError(Box::new(e)))?; - // Create GitNodeStorage - let storage = GitNodeStorage::new(git_repo.clone())?; + // Create GitNodeStorage with the current directory as dataset directory + let storage = GitNodeStorage::new(git_repo.clone(), path.to_path_buf())?; // Create ProllyTree with default config let config: TreeConfig = TreeConfig::default(); @@ -49,13 +97,19 @@ impl VersionedKvStore { let mut store = VersionedKvStore { tree, git_repo, - staging_area: std::collections::HashMap::new(), + staging_area: HashMap::new(), current_branch: "main".to_string(), }; + // Save initial configuration + let _ = store.tree.save_config(); + // Create initial commit store.commit("Initial commit")?; + // Auto-commit prolly metadata files after initialization + store.commit_prolly_metadata(" after init")?; + Ok(store) } @@ -63,15 +117,32 @@ impl VersionedKvStore { pub fn open>(path: P) -> Result { let path = path.as_ref(); + // Reject if trying to open in git root directory + if Self::is_in_git_root(path)? { + return Err(GitKvError::GitObjectError( + "Cannot run git-prolly in git root directory. Please run from a subdirectory containing a dataset.".to_string() + )); + } + + // Find the git repository + let git_root = Self::find_git_root(path).ok_or_else(|| { + GitKvError::GitObjectError( + "Not inside a git repository. Please run from within a git repository.".to_string(), + ) + })?; + // Open existing Git repository - let git_repo = gix::open(path).map_err(|e| GitKvError::GitOpenError(Box::new(e)))?; + let git_repo = gix::open(&git_root).map_err(|e| GitKvError::GitOpenError(Box::new(e)))?; - // Create GitNodeStorage - let storage = GitNodeStorage::new(git_repo.clone())?; + // Create GitNodeStorage with the current directory as dataset directory + let storage = GitNodeStorage::new(git_repo.clone(), path.to_path_buf())?; - // Load tree configuration (using default for now) - let config: TreeConfig = TreeConfig::default(); - let tree = ProllyTree::new(storage, config); + // Load tree configuration from storage + let config: TreeConfig = ProllyTree::load_config(&storage).unwrap_or_default(); + + // Try to load existing tree from storage, or create new one + let tree = ProllyTree::load_from_storage(storage.clone(), config.clone()) + .unwrap_or_else(|| ProllyTree::new(storage, config)); // Get current branch let current_branch = git_repo @@ -80,17 +151,26 @@ impl VersionedKvStore { .map(|r| r.name().shorten().to_string()) .unwrap_or_else(|| "main".to_string()); - Ok(VersionedKvStore { + let mut store = VersionedKvStore { tree, git_repo, - staging_area: std::collections::HashMap::new(), + staging_area: HashMap::new(), current_branch, - }) + }; + + // Load staging area from file if it exists + store.load_staging_area()?; + + // Reload the tree from the current HEAD + store.reload_tree_from_head()?; + + Ok(store) } /// Insert a key-value pair (stages the change) pub fn insert(&mut self, key: Vec, value: Vec) -> Result<(), GitKvError> { self.staging_area.insert(key, Some(value)); + self.save_staging_area()?; Ok(()) } @@ -99,6 +179,7 @@ impl VersionedKvStore { let exists = self.get(&key).is_some(); if exists { self.staging_area.insert(key, Some(value)); + self.save_staging_area()?; } Ok(exists) } @@ -108,6 +189,7 @@ impl VersionedKvStore { let exists = self.get(key).is_some(); if exists { self.staging_area.insert(key.to_vec(), None); + self.save_staging_area()?; } Ok(exists) } @@ -133,7 +215,12 @@ impl VersionedKvStore { pub fn list_keys(&self) -> Vec> { let mut keys = std::collections::HashSet::new(); - // Add keys from staging area + // Add keys from the committed ProllyTree + for key in self.tree.collect_keys() { + keys.insert(key); + } + + // Add keys from staging area (overrides committed data) for (key, value) in &self.staging_area { if value.is_some() { keys.insert(key.clone()); @@ -142,10 +229,6 @@ impl VersionedKvStore { } } - // Add keys from committed data (if not in staging) - // This is a simplified implementation - // In reality, we'd need to traverse the ProllyTree properly - keys.into_iter().collect() } @@ -184,6 +267,14 @@ impl VersionedKvStore { } } + // Persist the tree state + self.tree.persist_root(); + + // Save the updated configuration with the new root hash + self.tree + .save_config() + .map_err(|e| GitKvError::GitObjectError(format!("Failed to save config: {e}")))?; + // Create tree object in Git let tree_id = self.create_git_tree()?; @@ -193,6 +284,12 @@ impl VersionedKvStore { // Update HEAD self.update_head(commit_id)?; + // Clear staging area file since we've committed + self.save_staging_area()?; + + // Auto-commit prolly metadata files to git + self.commit_prolly_metadata(&format!(" after commit: {message}"))?; + Ok(commit_id) } @@ -220,6 +317,7 @@ impl VersionedKvStore { pub fn checkout(&mut self, branch_or_commit: &str) -> Result<(), GitKvError> { // Clear staging area self.staging_area.clear(); + self.save_staging_area()?; // Update HEAD to point to the new branch/commit let target_ref = if branch_or_commit.starts_with("refs/") { @@ -306,15 +404,10 @@ impl VersionedKvStore { /// Create a Git tree object from the current ProllyTree state fn create_git_tree(&self) -> Result { - // For now, create a simple tree with a placeholder entry - // In a real implementation, this would serialize the ProllyTree root - // and create a proper Git tree structure - - let tree_entries = vec![gix::objs::tree::Entry { - mode: gix::objs::tree::EntryMode(0o100644), - filename: "prolly_tree_root".into(), - oid: gix::ObjectId::null(gix::hash::Kind::Sha1), // Placeholder - }]; + // Create an empty tree - the ProllyTree state is managed through GitNodeStorage + // We don't need to create a prolly_tree_root file since the tree structure + // is stored in Git blobs and managed through the NodeStorage interface + let tree_entries = vec![]; let tree = gix::objs::Tree { entries: tree_entries, @@ -329,6 +422,23 @@ impl VersionedKvStore { Ok(tree_id) } + /// Get git user configuration (name and email) + fn get_git_user_config(&self) -> Result<(String, String), GitKvError> { + let config = self.git_repo.config_snapshot(); + + let name = config + .string("user.name") + .map(|n| n.to_string()) + .unwrap_or_else(|| "git-prolly".to_string()); + + let email = config + .string("user.email") + .map(|e| e.to_string()) + .unwrap_or_else(|| "git-prolly@example.com".to_string()); + + Ok((name, email)) + } + /// Create a Git commit object fn create_git_commit( &self, @@ -341,10 +451,13 @@ impl VersionedKvStore { .unwrap() .as_secs() as i64; + // Get git user configuration + let (name, email) = self.get_git_user_config()?; + // Create author and committer signatures let signature = gix::actor::Signature { - name: "git-prolly".into(), - email: "git-prolly@example.com".into(), + name: name.into(), + email: email.into(), time: gix::date::Time { seconds: now, offset: 0, @@ -379,23 +492,174 @@ impl VersionedKvStore { } /// Update HEAD to point to the new commit - fn update_head(&mut self, _commit_id: gix::ObjectId) -> Result<(), GitKvError> { + fn update_head(&mut self, commit_id: gix::ObjectId) -> Result<(), GitKvError> { // Update the current branch reference to point to the new commit - let _branch_ref = format!("refs/heads/{}", self.current_branch); + let branch_ref = format!("refs/heads/{}", self.current_branch); - // Note: This is a simplified implementation - // A full implementation would use gix reference transactions to properly update - // the branch reference to point to the new commit + // For now, use a simple implementation that writes directly to the file + let refs_dir = self.git_repo.path().join("refs").join("heads"); + std::fs::create_dir_all(&refs_dir).map_err(|e| { + GitKvError::GitObjectError(format!("Failed to create refs directory: {e}")) + })?; + + let branch_file = refs_dir.join(&self.current_branch); + std::fs::write(&branch_file, commit_id.to_hex().to_string()).map_err(|e| { + GitKvError::GitObjectError(format!("Failed to write branch reference: {e}")) + })?; + + // Update HEAD to point to the branch + let head_file = self.git_repo.path().join("HEAD"); + std::fs::write(&head_file, format!("ref: {branch_ref}\n")).map_err(|e| { + GitKvError::GitObjectError(format!("Failed to write HEAD reference: {e}")) + })?; Ok(()) } /// Reload the ProllyTree from the current HEAD fn reload_tree_from_head(&mut self) -> Result<(), GitKvError> { - // This is a simplified implementation - // In reality, we'd need to reconstruct the ProllyTree from Git objects + // Since we're no longer storing prolly_tree_root in the Git tree, + // we need to reload the tree state from the GitNodeStorage + + // Load tree configuration from storage + let config: TreeConfig = ProllyTree::load_config(&self.tree.storage).unwrap_or_default(); + + // Try to load existing tree from storage, or create new one + let storage = self.tree.storage.clone(); + self.tree = ProllyTree::load_from_storage(storage.clone(), config.clone()) + .unwrap_or_else(|| ProllyTree::new(storage, config)); + + Ok(()) + } + + /// Save the staging area to a file + fn save_staging_area(&self) -> Result<(), GitKvError> { + let staging_file = self.get_staging_file_path()?; + + // Serialize the staging area + let serialized = + bincode::serialize(&self.staging_area).map_err(GitKvError::SerializationError)?; + + std::fs::write(staging_file, serialized).map_err(|e| { + GitKvError::GitObjectError(format!("Failed to write staging area: {e}")) + })?; + + Ok(()) + } + + /// Stage and commit prolly metadata files to git + fn commit_prolly_metadata(&self, additional_message: &str) -> Result<(), GitKvError> { + // Get relative paths to the prolly files from git root + let git_root = Self::find_git_root(self.git_repo.path().parent().unwrap()).unwrap(); + let current_dir = std::env::current_dir().map_err(|e| { + GitKvError::GitObjectError(format!("Failed to get current directory: {e}")) + })?; + + let config_file = "prolly_config_tree_config"; + let mapping_file = "prolly_hash_mappings"; + + // Check if files exist before trying to stage them + let config_path = current_dir.join(config_file); + let mapping_path = current_dir.join(mapping_file); + + let mut files_to_stage = Vec::new(); + + if config_path.exists() { + // Get relative path from git root + if let Ok(relative_path) = config_path.strip_prefix(&git_root) { + files_to_stage.push(relative_path.to_string_lossy().to_string()); + } + } + + if mapping_path.exists() { + // Get relative path from git root + if let Ok(relative_path) = mapping_path.strip_prefix(&git_root) { + files_to_stage.push(relative_path.to_string_lossy().to_string()); + } + } + + if files_to_stage.is_empty() { + return Ok(()); // Nothing to commit + } + + // Stage the files using git add + for file in &files_to_stage { + let add_cmd = std::process::Command::new("git") + .args(["add", file]) + .current_dir(&git_root) + .output() + .map_err(|e| GitKvError::GitObjectError(format!("Failed to run git add: {e}")))?; + + if !add_cmd.status.success() { + let stderr = String::from_utf8_lossy(&add_cmd.stderr); + return Err(GitKvError::GitObjectError(format!( + "git add failed: {stderr}" + ))); + } + } + + // Commit the staged files + let commit_message = format!("Update prolly metadata{additional_message}"); + let commit_cmd = std::process::Command::new("git") + .args(["commit", "-m", &commit_message]) + .current_dir(&git_root) + .output() + .map_err(|e| GitKvError::GitObjectError(format!("Failed to run git commit: {e}")))?; + + if !commit_cmd.status.success() { + let stderr = String::from_utf8_lossy(&commit_cmd.stderr); + // It's okay if there's nothing to commit + if !stderr.is_empty() && !stderr.contains("nothing to commit") { + return Err(GitKvError::GitObjectError(format!( + "git commit failed: {stderr}" + ))); + } + } + + Ok(()) + } + + /// Load the staging area from a file + fn load_staging_area(&mut self) -> Result<(), GitKvError> { + let staging_file = self.get_staging_file_path()?; + + if staging_file.exists() { + let data = std::fs::read(staging_file).map_err(|e| { + GitKvError::GitObjectError(format!("Failed to read staging area: {e}")) + })?; + + self.staging_area = + bincode::deserialize(&data).map_err(GitKvError::SerializationError)?; + } + Ok(()) } + + /// Get the dataset-specific staging file path + fn get_staging_file_path(&self) -> Result { + // Get the current directory relative to git root + let current_dir = std::env::current_dir().map_err(|e| { + GitKvError::GitObjectError(format!("Failed to get current directory: {e}")) + })?; + + let git_root = Self::find_git_root(¤t_dir) + .ok_or_else(|| GitKvError::GitObjectError("Not in a git repository".to_string()))?; + + // Create a dataset-specific identifier from the relative path + let relative_path = current_dir + .strip_prefix(&git_root) + .map_err(|e| GitKvError::GitObjectError(format!("Failed to get relative path: {e}")))?; + + // Use the relative path to create a unique staging file name + let path_str = relative_path.to_string_lossy().replace(['/', '\\'], "_"); + let staging_filename = if path_str.is_empty() { + "PROLLY_STAGING_root".to_string() + } else { + format!("PROLLY_STAGING_{path_str}") + }; + + Ok(self.git_repo.path().join(staging_filename)) + } } #[cfg(test)] @@ -406,14 +670,24 @@ mod tests { #[test] fn test_versioned_store_init() { let temp_dir = TempDir::new().unwrap(); - let store = VersionedKvStore::<32>::init(temp_dir.path()); + // Initialize git repository (regular, not bare) + gix::init(temp_dir.path()).unwrap(); + // Create subdirectory for dataset + let dataset_dir = temp_dir.path().join("dataset"); + std::fs::create_dir_all(&dataset_dir).unwrap(); + let store = VersionedKvStore::<32>::init(&dataset_dir); assert!(store.is_ok()); } #[test] fn test_basic_kv_operations() { let temp_dir = TempDir::new().unwrap(); - let mut store = VersionedKvStore::<32>::init(temp_dir.path()).unwrap(); + // Initialize git repository (regular, not bare) + gix::init(temp_dir.path()).unwrap(); + // Create subdirectory for dataset + let dataset_dir = temp_dir.path().join("dataset"); + std::fs::create_dir_all(&dataset_dir).unwrap(); + let mut store = VersionedKvStore::<32>::init(&dataset_dir).unwrap(); // Test insert and get store.insert(b"key1".to_vec(), b"value1".to_vec()).unwrap(); @@ -433,7 +707,12 @@ mod tests { #[test] fn test_commit_workflow() { let temp_dir = TempDir::new().unwrap(); - let mut store = VersionedKvStore::<32>::init(temp_dir.path()).unwrap(); + // Initialize git repository (regular, not bare) + gix::init(temp_dir.path()).unwrap(); + // Create subdirectory for dataset + let dataset_dir = temp_dir.path().join("dataset"); + std::fs::create_dir_all(&dataset_dir).unwrap(); + let mut store = VersionedKvStore::<32>::init(&dataset_dir).unwrap(); // Stage changes store.insert(b"key1".to_vec(), b"value1".to_vec()).unwrap(); diff --git a/src/storage.rs b/src/storage.rs index 31fddba..5a6db82 100644 --- a/src/storage.rs +++ b/src/storage.rs @@ -19,6 +19,7 @@ use std::fmt::{Display, Formatter, LowerHex}; use std::fs::{self, File}; use std::io::{Read, Write}; use std::path::PathBuf; +use std::sync::RwLock; /// A trait for storage of nodes in the ProllyTree. /// @@ -66,10 +67,18 @@ pub trait NodeStorage: Send + Sync { /// # Type Parameters /// /// - `N`: The size of the value digest. -#[derive(Clone)] pub struct InMemoryNodeStorage { map: HashMap, ProllyNode>, - configs: HashMap>, + configs: RwLock>>, +} + +impl Clone for InMemoryNodeStorage { + fn clone(&self) -> Self { + InMemoryNodeStorage { + map: self.map.clone(), + configs: RwLock::new(self.configs.read().unwrap().clone()), + } + } } impl Default for InMemoryNodeStorage { @@ -82,7 +91,7 @@ impl InMemoryNodeStorage { pub fn new() -> Self { InMemoryNodeStorage { map: HashMap::new(), - configs: HashMap::new(), + configs: RwLock::new(HashMap::new()), } } } @@ -107,12 +116,14 @@ impl NodeStorage for InMemoryNodeStorage { } fn save_config(&self, key: &str, config: &[u8]) { - let mut configs = self.configs.clone(); - configs.insert(key.to_string(), config.to_vec()); + self.configs + .write() + .unwrap() + .insert(key.to_string(), config.to_vec()); } fn get_config(&self, key: &str) -> Option> { - self.configs.get(key).cloned() + self.configs.read().unwrap().get(key).cloned() } } diff --git a/src/tree.rs b/src/tree.rs index 51c3ccd..004525d 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -220,9 +220,9 @@ impl Default for TreeStats { } pub struct ProllyTree> { - root: ProllyNode, - storage: S, - config: TreeConfig, + pub root: ProllyNode, + pub storage: S, + pub config: TreeConfig, } impl> Tree for ProllyTree { @@ -525,14 +525,6 @@ impl> Tree for ProllyTree { } } -impl, const N: usize> ProllyTree { - fn persist_root(&mut self) { - // Save the updated child node back to the storage - self.storage - .insert_node(self.root.get_hash(), self.root.clone()); - } -} - impl> ProllyTree { /// Recursively computes the differences between two Prolly Nodes. /// @@ -589,6 +581,57 @@ impl> ProllyTree { diffs.push(DiffResult::Added(new_key.clone(), new_value.clone())); } } + + /// Persist the root node to storage and save configuration + pub fn persist_root(&mut self) { + // Store the root node in the storage + let root_hash = self.root.get_hash(); + if self + .storage + .insert_node(root_hash.clone(), self.root.clone()) + .is_some() + { + // Update the config with the new root hash + self.config.root_hash = Some(root_hash); + + // Save the configuration + let _ = self.save_config(); + } + } + + /// Load a ProllyTree from an existing root hash in storage + pub fn load_from_storage(storage: S, config: TreeConfig) -> Option { + if let Some(ref root_hash) = config.root_hash { + if let Some(root_node) = storage.get_node_by_hash(root_hash) { + return Some(ProllyTree { + root: root_node, + storage, + config, + }); + } + } + None + } + + /// Collect all keys from the tree + pub fn collect_keys(&self) -> Vec> { + let mut keys = Vec::new(); + self.collect_keys_recursive(&self.root, &mut keys); + keys + } + + /// Recursively collect keys from a node and its children + fn collect_keys_recursive(&self, node: &ProllyNode, keys: &mut Vec>) { + // Add all keys from this node + for key in &node.keys { + keys.push(key.clone()); + } + + // Recursively collect keys from child nodes + for child_node in node.children(&self.storage) { + self.collect_keys_recursive(&child_node, keys); + } + } } #[cfg(test)] @@ -735,6 +778,29 @@ mod tests { assert!(tree.find(b"key3").is_none()); } + #[test] + fn test_persist_and_load() { + let storage = InMemoryNodeStorage::<32>::default(); + let config = TreeConfig::default(); + + // Create tree and add data + let mut tree = ProllyTree::new(storage.clone(), config.clone()); + tree.insert(b"key1".to_vec(), b"value1".to_vec()); + tree.insert(b"key2".to_vec(), b"value2".to_vec()); + + // Persist the tree + tree.persist_root(); + + // Load the tree from storage + let loaded_tree = ProllyTree::load_from_storage(tree.storage, tree.config) + .expect("Should be able to load tree from storage"); + + // Verify data is preserved + assert!(loaded_tree.find(b"key1").is_some()); + assert!(loaded_tree.find(b"key2").is_some()); + assert!(loaded_tree.find(b"key3").is_none()); + } + #[test] fn test_insert_batch_and_find() { let storage = InMemoryNodeStorage::<32>::default();