diff --git a/README.md b/README.md index cd0327b..bc31b9c 100644 --- a/README.md +++ b/README.md @@ -192,12 +192,11 @@ The following features are for Prolly tree library for Version 0.2.0: The following features are for Prolly tree library for Version 0.2.1: - [X] tree diffing and merging examples -- [ ] show history of changes of the Prolly tree (git logs style) using `gitoxide` crate -- [ ] build database index using Prolly Tree +- [X] show history of changes of the Prolly tree (git logs style) using `gitoxide` crate +- [ ] support gluesql as a kv store The following features are for Prolly tree library for Version 0.2.2: - [ ] version-controlled databases - - [ ] prolly tree backend for git databases using `gitoxide` crate - [ ] support for IPFS (InterPlanetary File System) for distributed storage - [ ] advanced probabilistic splitting algorithms - [ ] decentralized databases diff --git a/src/bin/git-prolly.rs b/src/bin/git-prolly.rs index af51cf6..9b23fc8 100644 --- a/src/bin/git-prolly.rs +++ b/src/bin/git-prolly.rs @@ -86,31 +86,12 @@ enum Commands { /// Show KV state at specific commit Show { - #[arg(help = "Commit to show")] - commit: String, + #[arg(help = "Commit to show (defaults to HEAD)")] + commit: Option, #[arg(long, help = "Show only keys")] keys_only: bool, }, - /// KV-aware log with summaries - Log { - #[arg(long, help = "Show KV change summary")] - kv_summary: bool, - #[arg(long, help = "Filter by key pattern")] - keys: Option, - #[arg(long, help = "Limit number of commits")] - limit: Option, - }, - - /// List all branches - Branch, - - /// Switch to a branch or commit - Checkout { - #[arg(help = "Branch or commit to checkout")] - target: String, - }, - /// Merge another branch Merge { #[arg(help = "Branch to merge")] @@ -119,13 +100,7 @@ enum Commands { strategy: Option, }, - /// Revert a commit - Revert { - #[arg(help = "Commit to revert")] - commit: String, - }, - - /// Show repository statistics + /// Show KV store statistics Stats { #[arg(help = "Commit to analyze (defaults to HEAD)")] commit: Option, @@ -168,25 +143,9 @@ fn main() -> Result<(), Box> { Commands::Show { commit, keys_only } => { handle_show(commit, keys_only)?; } - Commands::Log { - kv_summary, - keys, - limit, - } => { - handle_log(kv_summary, keys, limit)?; - } - Commands::Branch => { - handle_branch()?; - } - Commands::Checkout { target } => { - handle_checkout(target)?; - } Commands::Merge { branch, strategy } => { handle_merge(branch, strategy)?; } - Commands::Revert { commit } => { - handle_revert(commit)?; - } Commands::Stats { commit } => { handle_stats(commit)?; } @@ -196,7 +155,8 @@ fn main() -> Result<(), Box> { } fn handle_init(path: Option) -> Result<(), Box> { - let target_path = path.unwrap_or_else(|| env::current_dir().unwrap()); + let target_path = + path.unwrap_or_else(|| env::current_dir().unwrap_or_else(|_| PathBuf::from("."))); println!("Initializing ProllyTree KV store in {target_path:?}..."); @@ -213,13 +173,9 @@ fn handle_set(key: String, value: String) -> Result<(), Box::open(¤t_dir)?; - // Clone the strings before moving them into insert - let key_display = key.clone(); - let value_display = value.clone(); - - store.insert(key.into_bytes(), value.into_bytes())?; + store.insert(key.as_bytes().to_vec(), value.as_bytes().to_vec())?; - println!("✓ Staged: {key_display} = \"{value_display}\""); + println!("✓ Staged: {key} = \"{value}\""); println!(" (Use 'git prolly commit' to save changes)"); Ok(()) @@ -467,15 +423,16 @@ fn handle_diff( Ok(()) } -fn handle_show(commit: String, keys_only: bool) -> Result<(), Box> { +fn handle_show(commit: Option, keys_only: bool) -> Result<(), Box> { let current_dir = env::current_dir()?; let store = VersionedKvStore::<32>::open(¤t_dir)?; let ops = GitOperations::new(store); - let details = ops.show(&commit)?; + let commit_ref = commit.unwrap_or_else(|| "HEAD".to_string()); + let details = ops.show(&commit_ref)?; if keys_only { - println!("Keys at commit {commit}:"); + println!("Keys at commit {commit_ref}:"); for change in details.changes { let key_str = String::from_utf8_lossy(&change.key); println!(" {key_str}"); @@ -517,117 +474,6 @@ fn handle_show(commit: String, keys_only: bool) -> Result<(), Box, - limit: Option, -) -> Result<(), Box> { - let current_dir = env::current_dir()?; - let store = VersionedKvStore::<32>::open(¤t_dir)?; - - let mut history = store.log()?; - - if let Some(limit) = limit { - history.truncate(limit); - } - - // Check current branch for the first commit (HEAD) - let current_branch = store.current_branch(); - let head_commit_id = store.git_repo().head_id().ok(); - - for (index, commit) in history.iter().enumerate() { - let date = chrono::DateTime::from_timestamp(commit.timestamp, 0).unwrap_or_default(); - - // Format like git log: "Wed Jul 16 22:27:36 2025 -0700" - let formatted_date = date.format("%a %b %d %H:%M:%S %Y %z"); - - // Add branch reference for HEAD commit - let branch_ref = if index == 0 - && head_commit_id.as_ref().map(|id| id.as_ref()) == Some(commit.id.as_ref()) - { - format!(" (HEAD -> {current_branch})") - } else { - String::new() - }; - - if kv_summary { - // Get changes for this commit - create a new store instance - let ops_store = VersionedKvStore::<32>::open(¤t_dir)?; - let ops = GitOperations::new(ops_store); - let changes = match ops.show(&commit.id.to_string()) { - Ok(details) => details.changes, - Err(_) => vec![], - }; - - let added = changes - .iter() - .filter(|c| matches!(c.operation, DiffOperation::Added(_))) - .count(); - let removed = changes - .iter() - .filter(|c| matches!(c.operation, DiffOperation::Removed(_))) - .count(); - let modified = changes - .iter() - .filter(|c| matches!(c.operation, DiffOperation::Modified { .. })) - .count(); - - println!("commit {}{}", commit.id, branch_ref); - println!("Author: {}", commit.author); - println!("Date: {formatted_date}"); - println!(); - println!( - " {} (+{} ~{} -{})", - commit.message, added, modified, removed - ); - println!(); - } else { - println!("commit {}{}", commit.id, branch_ref); - println!("Author: {}", commit.author); - println!("Date: {formatted_date}"); - println!(); - println!(" {}", commit.message); - println!(); - } - } - - Ok(()) -} - -fn handle_branch() -> Result<(), Box> { - let current_dir = env::current_dir()?; - let store = VersionedKvStore::<32>::open(¤t_dir)?; - - let branches = store.list_branches()?; - let current_branch = store.current_branch(); - - if branches.is_empty() { - println!("No branches found"); - return Ok(()); - } - - for branch in branches { - if branch == current_branch { - println!("* {branch}"); - } else { - println!(" {branch}"); - } - } - - Ok(()) -} - -fn handle_checkout(target: String) -> Result<(), Box> { - let current_dir = env::current_dir()?; - let mut store = VersionedKvStore::<32>::open(¤t_dir)?; - - store.checkout(&target)?; - - println!("✓ Switched to: {target}"); - - Ok(()) -} - fn handle_merge( branch: String, _strategy: Option, @@ -648,29 +494,24 @@ fn handle_merge( println!(" Merge commit: {commit_id}"); } MergeResult::Conflict(conflicts) => { - println!("⚠ Merge conflicts detected:"); - for conflict in conflicts { - println!(" {conflict}"); + // Check if this is our "manual merge needed" indicator + if conflicts.len() == 1 && conflicts[0].key == b"" { + println!("⚠ Cannot automatically merge branches"); + println!(" The branches have diverged and require manual merging"); + println!(" Use 'git merge {branch}' to perform a manual merge"); + } else { + println!("⚠ Merge conflicts detected:"); + for conflict in conflicts { + println!(" {conflict}"); + } + println!("\nResolve conflicts and run 'git prolly commit' to complete the merge"); } - println!("\nResolve conflicts and run 'git prolly commit' to complete the merge"); } } Ok(()) } -fn handle_revert(commit: String) -> Result<(), Box> { - let current_dir = env::current_dir()?; - let store = VersionedKvStore::<32>::open(¤t_dir)?; - let mut ops = GitOperations::new(store); - - ops.revert(&commit)?; - - println!("✓ Reverted commit: {commit}"); - - Ok(()) -} - fn handle_stats(commit: Option) -> Result<(), Box> { let current_dir = env::current_dir()?; let store = VersionedKvStore::<32>::open(¤t_dir)?; diff --git a/src/git/operations.rs b/src/git/operations.rs index 0936514..6395d89 100644 --- a/src/git/operations.rs +++ b/src/git/operations.rs @@ -27,40 +27,97 @@ impl GitOperations { GitOperations { store } } - /// Perform a three-way merge between two branches + /// Perform a merge between two branches, focusing on fast-forward merges pub fn merge(&mut self, other_branch: &str) -> Result { // Get the current branch state - let current_branch = self.store.current_branch().to_string(); + let current_branch = self.store.current_branch(); - // Find the common ancestor (merge base) - let merge_base = self.find_merge_base(¤t_branch, other_branch)?; + // Get the commit IDs for both branches + let current_commit = self.get_branch_commit(current_branch)?; + let other_commit = self.get_branch_commit(other_branch)?; - // Get the states at each commit - let base_state = self.get_kv_state_at_commit(&merge_base)?; - let our_state = self.get_current_kv_state()?; - let their_state = self.get_kv_state_at_branch(other_branch)?; + // Check if they're the same (nothing to merge) + if current_commit == other_commit { + return Ok(MergeResult::FastForward(current_commit)); + } + + // Check if we can do a fast-forward merge + if self.is_fast_forward_possible(¤t_commit, &other_commit)? { + // Fast-forward merge: just update HEAD to the other branch + self.store.checkout(other_branch)?; + return Ok(MergeResult::FastForward(other_commit)); + } + + // For now, we don't support three-way merges + // Return a conflict indicating manual merge is needed + let conflicts = vec![crate::git::types::KvConflict { + key: b"".to_vec(), + base_value: None, + our_value: Some(b"Cannot automatically merge - manual merge required".to_vec()), + their_value: Some(b"Use 'git merge' or resolve conflicts manually".to_vec()), + }]; + + Ok(MergeResult::Conflict(conflicts)) + } + + /// Check if a fast-forward merge is possible + fn is_fast_forward_possible( + &self, + current_commit: &gix::ObjectId, + other_commit: &gix::ObjectId, + ) -> Result { + // Fast-forward is possible if the other commit is a descendant of the current commit + // This means the current commit should be an ancestor of the other commit + self.is_ancestor(current_commit, other_commit) + } - // Perform three-way merge - let merge_result = self.perform_three_way_merge(&base_state, &our_state, &their_state)?; + /// Check if commit A is an ancestor of commit B + fn is_ancestor( + &self, + ancestor: &gix::ObjectId, + descendant: &gix::ObjectId, + ) -> Result { + // If they're the same, ancestor relationship is true + if ancestor == descendant { + return Ok(true); + } + + // Walk through the parents of the descendant commit + let mut visited = std::collections::HashSet::new(); + let mut queue = std::collections::VecDeque::new(); + queue.push_back(*descendant); - match merge_result { - MergeResult::Conflict(conflicts) => { - // Return conflicts for user resolution - Ok(MergeResult::Conflict(conflicts)) + while let Some(current_commit) = queue.pop_front() { + if visited.contains(¤t_commit) { + continue; } - MergeResult::FastForward(commit_id) => { - // Update HEAD to the target commit - self.store.checkout(&commit_id.to_string())?; - Ok(MergeResult::FastForward(commit_id)) + visited.insert(current_commit); + + // If we found the ancestor, return true + if current_commit == *ancestor { + return Ok(true); } - MergeResult::ThreeWay(_commit_id) => { - // The merge was successful, commit the result - let final_commit = self - .store - .commit(&format!("Merge branch '{other_branch}'"))?; - Ok(MergeResult::ThreeWay(final_commit)) + + // Add parents to queue + let mut buffer = Vec::new(); + if let Ok(commit_obj) = self + .store + .git_repo() + .objects + .find(¤t_commit, &mut buffer) + { + if let Ok(gix::objs::ObjectRef::Commit(commit)) = commit_obj.decode() { + for parent_id in commit.parents() { + if !visited.contains(&parent_id) { + queue.push_back(parent_id); + } + } + } } } + + // If we didn't find the ancestor, return false + Ok(false) } /// Generate a diff between two branches or commits @@ -195,96 +252,6 @@ impl GitOperations { Ok(()) } - /// Find the merge base between two branches - fn find_merge_base(&self, branch1: &str, branch2: &str) -> Result { - let commit1 = self.get_branch_commit(branch1)?; - let commit2 = self.get_branch_commit(branch2)?; - - // If the commits are the same, return it as the merge base - if commit1 == commit2 { - return Ok(commit1); - } - - // Get all ancestors of commit1 - let mut ancestors1 = std::collections::HashSet::new(); - self.collect_ancestors(&commit1, &mut ancestors1)?; - - // Walk through ancestors of commit2 to find the first common ancestor - let mut visited = std::collections::HashSet::new(); - let mut queue = std::collections::VecDeque::new(); - queue.push_back(commit2); - - while let Some(current_commit) = queue.pop_front() { - if visited.contains(¤t_commit) { - continue; - } - visited.insert(current_commit); - - // If this commit is an ancestor of commit1, it's our merge base - if ancestors1.contains(¤t_commit) { - return Ok(current_commit); - } - - // Add parents to queue - let mut buffer = Vec::new(); - if let Ok(commit_obj) = self - .store - .git_repo() - .objects - .find(¤t_commit, &mut buffer) - { - if let Ok(gix::objs::ObjectRef::Commit(commit)) = commit_obj.decode() { - for parent_id in commit.parents() { - if !visited.contains(&parent_id) { - queue.push_back(parent_id); - } - } - } - } - } - - // If no common ancestor found, return an error - Err(GitKvError::GitObjectError(format!( - "No common ancestor found between {branch1} and {branch2}" - ))) - } - - /// Collect all ancestors of a commit - fn collect_ancestors( - &self, - start_commit: &gix::ObjectId, - ancestors: &mut std::collections::HashSet, - ) -> Result<(), GitKvError> { - let mut queue = std::collections::VecDeque::new(); - queue.push_back(*start_commit); - - while let Some(current_commit) = queue.pop_front() { - if ancestors.contains(¤t_commit) { - continue; - } - ancestors.insert(current_commit); - - // Add parents to queue - let mut buffer = Vec::new(); - if let Ok(commit_obj) = self - .store - .git_repo() - .objects - .find(¤t_commit, &mut buffer) - { - if let Ok(gix::objs::ObjectRef::Commit(commit)) = commit_obj.decode() { - for parent_id in commit.parents() { - if !ancestors.contains(&parent_id) { - queue.push_back(parent_id); - } - } - } - } - } - - Ok(()) - } - /// Get the commit ID for a branch fn get_branch_commit(&self, branch: &str) -> Result { // Try to resolve the branch reference @@ -334,14 +301,107 @@ impl GitOperations { return self.get_current_kv_state(); } - // For now, return empty state for non-HEAD commits - // This is a limitation - in a full implementation, we would need to: - // 1. Parse the commit object to get the tree - // 2. Reconstruct the ProllyTree from the Git objects - // 3. Extract key-value pairs from the reconstructed tree - // - // For the purpose of fixing the immediate issue, we'll focus on HEAD commits - Ok(HashMap::new()) + // Reconstruct the ProllyTree state from the specific commit + self.reconstruct_kv_state_from_commit(commit_id) + } + + /// Reconstruct KV state from a specific commit by temporarily switching to it + fn reconstruct_kv_state_from_commit( + &self, + commit_id: &gix::ObjectId, + ) -> Result, Vec>, GitKvError> { + // Create a temporary store to reconstruct the state + let current_dir = std::env::current_dir() + .map_err(|e| GitKvError::GitObjectError(format!("Failed to get current dir: {e}")))?; + + // Create a temporary clone of the versioned store + let mut temp_store = VersionedKvStore::::open(¤t_dir)?; + + // Save current state + let original_branch = temp_store.current_branch().to_string(); + + // Switch to the target commit temporarily + let result = self.checkout_commit_temporarily(&mut temp_store, commit_id); + + // Restore original state + if let Err(e) = temp_store.checkout(&original_branch) { + // Log error but continue with the result we got + eprintln!("Warning: Failed to restore original branch {original_branch}: {e}"); + } + + result + } + + /// Temporarily checkout a commit and extract its KV state + fn checkout_commit_temporarily( + &self, + store: &mut VersionedKvStore, + commit_id: &gix::ObjectId, + ) -> Result, Vec>, GitKvError> { + // Update the store to point to the specific commit + // This is a simplified approach - we'll try to reconstruct from the commit + + // For now, we'll create a temporary directory and checkout the commit there + // This is a workaround until we implement full historical state reconstruction + let temp_dir = std::env::temp_dir().join(format!("prolly_temp_{}", commit_id.to_hex())); + + // Create temporary directory + std::fs::create_dir_all(&temp_dir) + .map_err(|e| GitKvError::GitObjectError(format!("Failed to create temp dir: {e}")))?; + + // Use git to checkout the specific commit in the temp directory + let output = std::process::Command::new("git") + .args([ + "clone", + "--quiet", + store.git_repo().path().to_str().unwrap_or("."), + temp_dir.to_str().unwrap_or("."), + ]) + .output() + .map_err(|e| GitKvError::GitObjectError(format!("Failed to clone repo: {e}")))?; + + if !output.status.success() { + return Err(GitKvError::GitObjectError(format!( + "Git clone failed: {}", + String::from_utf8_lossy(&output.stderr) + ))); + } + + // Checkout the specific commit + let output = std::process::Command::new("git") + .args(["checkout", "--quiet", &commit_id.to_hex().to_string()]) + .current_dir(&temp_dir) + .output() + .map_err(|e| GitKvError::GitObjectError(format!("Failed to checkout commit: {e}")))?; + + if !output.status.success() { + // Clean up temp directory + let _ = std::fs::remove_dir_all(&temp_dir); + return Err(GitKvError::GitObjectError(format!( + "Git checkout failed: {}", + String::from_utf8_lossy(&output.stderr) + ))); + } + + // Try to open the store at the temp location + let dataset_dir = temp_dir.join("dataset"); + let result = if dataset_dir.exists() { + match VersionedKvStore::::open(&dataset_dir) { + Ok(temp_store) => self.get_current_kv_state_from_store(&temp_store), + Err(_) => { + // If we can't open the store, return empty state + Ok(HashMap::new()) + } + } + } else { + // No dataset directory, return empty state + Ok(HashMap::new()) + }; + + // Clean up temp directory + let _ = std::fs::remove_dir_all(&temp_dir); + + result } /// Get KV state at a specific branch @@ -378,54 +438,6 @@ impl GitOperations { Ok(state) } - /// Perform a three-way merge - fn perform_three_way_merge( - &self, - base: &HashMap, Vec>, - ours: &HashMap, Vec>, - theirs: &HashMap, Vec>, - ) -> Result { - let mut conflicts = Vec::new(); - - // Collect all keys - let mut all_keys = std::collections::HashSet::new(); - for key in base.keys() { - all_keys.insert(key.clone()); - } - for key in ours.keys() { - all_keys.insert(key.clone()); - } - for key in theirs.keys() { - all_keys.insert(key.clone()); - } - - // Check for conflicts - for key in all_keys { - let base_value = base.get(&key); - let our_value = ours.get(&key); - let their_value = theirs.get(&key); - - // Detect conflicts - if base_value != our_value && base_value != their_value && our_value != their_value { - conflicts.push(KvConflict { - key: key.clone(), - base_value: base_value.cloned(), - our_value: our_value.cloned(), - their_value: their_value.cloned(), - }); - } - } - - if conflicts.is_empty() { - // No conflicts, create merge commit - Ok(MergeResult::ThreeWay(gix::ObjectId::null( - gix::hash::Kind::Sha1, - ))) - } else { - Ok(MergeResult::Conflict(conflicts)) - } - } - /// Parse a commit ID from a string fn parse_commit_id(&self, commit: &str) -> Result { // Try to resolve using git's rev-parse functionality diff --git a/src/git/versioned_store.rs b/src/git/versioned_store.rs index 5de036f..5ef32ce 100644 --- a/src/git/versioned_store.rs +++ b/src/git/versioned_store.rs @@ -295,21 +295,52 @@ impl VersionedKvStore { /// Create a new branch pub fn branch(&mut self, name: &str) -> Result<(), GitKvError> { - // Get the current HEAD commit - simplified approach + // Get the current HEAD commit let head = self .git_repo .head() .map_err(|e| GitKvError::GitObjectError(format!("Failed to get HEAD: {e}")))?; - let _head_commit_id = head.id().ok_or_else(|| { + let head_commit_id = head.id().ok_or_else(|| { GitKvError::GitObjectError("HEAD does not point to a commit".to_string()) })?; - let _branch_ref = format!("refs/heads/{name}"); + // Create the branch reference to point to the current HEAD + let refs_dir = self.git_repo.path().join("refs").join("heads"); + std::fs::create_dir_all(&refs_dir).map_err(|e| { + GitKvError::GitObjectError(format!("Failed to create refs directory: {e}")) + })?; + + let branch_file = refs_dir.join(name); + std::fs::write(&branch_file, head_commit_id.to_hex().to_string()).map_err(|e| { + GitKvError::GitObjectError(format!("Failed to write branch reference: {e}")) + })?; + + Ok(()) + } + + /// Create a new branch from the current branch and switch to it + pub fn create_branch(&mut self, name: &str) -> Result<(), GitKvError> { + // First create the branch + self.branch(name)?; + + // Then switch to it + // Clear staging area + self.staging_area.clear(); + self.save_staging_area()?; + + // Update our internal tracking to the new branch + self.current_branch = name.to_string(); + + // Update HEAD to point to the new branch + let head_file = self.git_repo.path().join("HEAD"); + let head_content = format!("ref: refs/heads/{name}"); + std::fs::write(&head_file, head_content) + .map_err(|e| GitKvError::GitObjectError(format!("Failed to update HEAD: {e}")))?; + + // Reload tree state from the current HEAD (same as current branch) + self.reload_tree_from_head()?; - // Note: This is a simplified implementation - // A full implementation would use gix transaction API to properly create branch references - // For now, we return success as branch operations are handled at a higher level Ok(()) } @@ -330,8 +361,14 @@ impl VersionedKvStore { match self.git_repo.refs.find(&target_ref) { Ok(_reference) => { // Update our internal tracking - // Note: A full implementation would use gix transaction API to update HEAD self.current_branch = branch_or_commit.to_string(); + + // Update HEAD to point to the new branch + let head_file = self.git_repo.path().join("HEAD"); + let head_content = format!("ref: refs/heads/{branch_or_commit}"); + std::fs::write(&head_file, head_content).map_err(|e| { + GitKvError::GitObjectError(format!("Failed to update HEAD: {e}")) + })?; } Err(_) => { return Err(GitKvError::BranchNotFound(branch_or_commit.to_string()));