diff --git a/python/prollytree/prollytree.pyi b/python/prollytree/prollytree.pyi index e8ebeef..421d6b6 100644 --- a/python/prollytree/prollytree.pyi +++ b/python/prollytree/prollytree.pyi @@ -318,6 +318,42 @@ class ConflictResolution: TakeSource: "ConflictResolution" TakeDestination: "ConflictResolution" +class DiffOperation: + """Represents a difference operation (Added, Removed, or Modified)""" + + @property + def operation_type(self) -> str: + """The type of operation: 'Added', 'Removed', or 'Modified'""" + ... + + @property + def value(self) -> Optional[bytes]: + """For Added/Removed operations, the value involved""" + ... + + @property + def old_value(self) -> Optional[bytes]: + """For Modified operations, the old value""" + ... + + @property + def new_value(self) -> Optional[bytes]: + """For Modified operations, the new value""" + ... + +class KvDiff: + """Represents a key-value difference between two references""" + + @property + def key(self) -> bytes: + """The key that changed""" + ... + + @property + def operation(self) -> DiffOperation: + """The operation that occurred on this key""" + ... + class VersionedKvStore: """A versioned key-value store backed by Git and ProllyTree""" @@ -483,6 +519,7 @@ class VersionedKvStore: """ ... + def get_commit_history(self) -> List[Dict[str, Union[str, int]]]: """ Get the commit history for the repository. @@ -595,3 +632,39 @@ class VersionedKvStore: pairs = store.get_keys_at_ref("HEAD~1") """ ... + + def diff(self, from_ref: str, to_ref: str) -> List[KvDiff]: + """ + Compare two commits or branches and return all keys that are added, updated or deleted. + + Args: + from_ref: Reference (branch or commit) to compare from + to_ref: Reference (branch or commit) to compare to + + Returns: + List of KvDiff objects representing the differences between the two references + + Example: + # Compare two commits + diffs = store.diff("abc123", "def456") + + # Compare two branches + diffs = store.diff("main", "feature-branch") + + # Check what changed from last commit + diffs = store.diff("HEAD~1", "HEAD") + """ + ... + + def current_commit(self) -> str: + """ + Get the current commit's object ID. + + Returns: + The hexadecimal string representation of the current commit ID + + Example: + commit_id = store.current_commit() + print(f"Current commit: {commit_id}") + """ + ... diff --git a/python/tests/test_diff.py b/python/tests/test_diff.py new file mode 100644 index 0000000..b0026fa --- /dev/null +++ b/python/tests/test_diff.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python3 +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the diff and current_commit functionality in VersionedKvStore.""" + +import tempfile +import shutil +import subprocess +import os +import pytest +from pathlib import Path + +import prollytree + + +class TestDiffFunctionality: + """Test diff and current_commit functions.""" + + def setup_method(self): + """Set up test fixtures.""" + self.temp_dir = tempfile.mkdtemp() + + # Initialize git repository in the temp directory + subprocess.run(["git", "init"], cwd=self.temp_dir, check=True, capture_output=True) + subprocess.run(["git", "config", "user.name", "Test User"], cwd=self.temp_dir, check=True) + subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=self.temp_dir, check=True) + + # Create subdirectory for the store (not in git root) + self.store_path = Path(self.temp_dir) / "data" + self.store_path.mkdir(parents=True, exist_ok=True) + + # Change working directory to the store path for git operations + self.original_cwd = os.getcwd() + os.chdir(str(self.store_path)) + + def teardown_method(self): + """Clean up test fixtures.""" + # Restore original working directory + os.chdir(self.original_cwd) + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_diff_between_commits(self): + """Test diff between two commits.""" + # Initialize store + store = prollytree.VersionedKvStore(str(self.store_path)) + + # Create first commit + store.insert(b"key1", b"value1") + store.insert(b"key2", b"value2") + commit1 = store.commit("Initial commit") + + # Create second commit with changes + store.insert(b"key3", b"value3") # Added + store.update(b"key1", b"value1_modified") # Modified + store.delete(b"key2") # Removed + commit2 = store.commit("Second commit") + + # Get diff between commits + diffs = store.diff(commit1, commit2) + + # Verify diff results + diff_map = {diff.key: diff.operation for diff in diffs} + + # Check that we have all expected changes + assert len(diffs) == 3 + assert b"key1" in diff_map + assert b"key2" in diff_map + assert b"key3" in diff_map + + # Verify operation types + key1_op = diff_map[b"key1"] + assert key1_op.operation_type == "Modified" + assert key1_op.old_value == b"value1" + assert key1_op.new_value == b"value1_modified" + + key2_op = diff_map[b"key2"] + assert key2_op.operation_type == "Removed" + assert key2_op.value == b"value2" + + key3_op = diff_map[b"key3"] + assert key3_op.operation_type == "Added" + assert key3_op.value == b"value3" + + def test_diff_between_branches(self): + """Test diff between two branches.""" + # Initialize store + store = prollytree.VersionedKvStore(str(self.store_path)) + + # Create initial data on main branch + store.insert(b"shared", b"initial") + store.insert(b"main_only", b"main_value") + store.commit("Initial commit on main") + + # Create feature branch + store.create_branch("feature") + + # Make changes on feature branch + store.update(b"shared", b"feature_value") + store.insert(b"feature_only", b"feature_data") + store.delete(b"main_only") + store.commit("Changes on feature branch") + + # Get diff between branches + diffs = store.diff("main", "feature") + + # Verify diff results + assert len(diffs) == 3 + + diff_map = {diff.key: diff.operation for diff in diffs} + + # Check shared key was modified + shared_op = diff_map[b"shared"] + assert shared_op.operation_type == "Modified" + assert shared_op.old_value == b"initial" + assert shared_op.new_value == b"feature_value" + + # Check main_only was removed + main_only_op = diff_map[b"main_only"] + assert main_only_op.operation_type == "Removed" + + # Check feature_only was added + feature_only_op = diff_map[b"feature_only"] + assert feature_only_op.operation_type == "Added" + + def test_current_commit(self): + """Test getting current commit ID.""" + # Initialize store + store = prollytree.VersionedKvStore(str(self.store_path)) + + # Create first commit + store.insert(b"key1", b"value1") + commit1 = store.commit("First commit") + + # Get current commit + current = store.current_commit() + assert current == commit1 + + # Create second commit + store.insert(b"key2", b"value2") + commit2 = store.commit("Second commit") + + # Current commit should be updated + current = store.current_commit() + assert current == commit2 + + # Test with branch operations + store.create_branch("test-branch") + store.insert(b"key3", b"value3") + commit3 = store.commit("Third commit on branch") + + # Current commit should be updated + current = store.current_commit() + assert current == commit3 + + # Checkout back to main branch + store.checkout("main") + current = store.current_commit() + assert current == commit2 + + def test_diff_with_no_changes(self): + """Test diff when there are no changes.""" + # Initialize store + store = prollytree.VersionedKvStore(str(self.store_path)) + + # Create a commit + store.insert(b"key1", b"value1") + commit1 = store.commit("First commit") + + # Get diff between same commit + diffs = store.diff(commit1, commit1) + + # Should be empty + assert len(diffs) == 0 + + def test_diff_representation(self): + """Test string representation of diff objects.""" + # Initialize store + store = prollytree.VersionedKvStore(str(self.store_path)) + + # Create commits with changes + store.insert(b"key1", b"value1") + commit1 = store.commit("First") + + store.update(b"key1", b"value2") + commit2 = store.commit("Second") + + # Get diff + diffs = store.diff(commit1, commit2) + + # Check representation + assert len(diffs) == 1 + diff = diffs[0] + + # Test __repr__ methods + repr_str = repr(diff) + assert "key1" in repr_str + assert "Modified" in repr_str + + op_repr = repr(diff.operation) + assert "Modified" in op_repr + assert "old_size" in op_repr + assert "new_size" in op_repr + + def test_get_commits_for_key_functionality(self): + """Test the get_commits_for_key function works correctly.""" + # Initialize store + store = prollytree.VersionedKvStore(str(self.store_path)) + + # Create commits with changes to a specific key + store.insert(b"tracked_key", b"value1") + store.insert(b"other_key", b"other_value") + commit1 = store.commit("First commit") + + store.update(b"tracked_key", b"value2") + commit2 = store.commit("Second commit - tracked_key changed") + + store.insert(b"another_key", b"another_value") + commit3 = store.commit("Third commit - no tracked_key change") + + # Test get_commits_for_key functionality + commits_for_key = store.get_commits_for_key(b"tracked_key") + + # Should return 2 commits that modified tracked_key + assert len(commits_for_key) == 2 + + # Verify the commit IDs match what we expect + commit_ids = [commit['id'] for commit in commits_for_key] + assert commit2 in commit_ids # Most recent change + assert commit1 in commit_ids # First commit with this key + assert commit3 not in [c['id'] for c in commits_for_key] # Third commit didn't touch tracked_key + + # Verify commits are in reverse chronological order (newest first) + assert commits_for_key[0]['id'] == commit2 # Most recent first + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/src/python.rs b/src/python.rs index 5c63d49..3bebc8a 100644 --- a/src/python.rs +++ b/src/python.rs @@ -23,7 +23,7 @@ use crate::{ agent::{AgentMemorySystem, MemoryType}, config::TreeConfig, git::{ - types::StorageBackend, + types::{DiffOperation, StorageBackend}, versioned_store::{HistoricalAccess, HistoricalCommitAccess}, GitVersionedKvStore, }, @@ -815,6 +815,96 @@ enum PyConflictResolution { TakeDestination, } +/// Python wrapper for DiffOperation +#[pyclass(name = "DiffOperation")] +#[derive(Clone)] +struct PyDiffOperation { + operation_type: String, + value: Option>, + old_value: Option>, + new_value: Option>, +} + +#[pymethods] +impl PyDiffOperation { + #[getter] + fn operation_type(&self) -> String { + self.operation_type.clone() + } + + #[getter] + fn value(&self, py: Python) -> PyResult>> { + Ok(self + .value + .as_ref() + .map(|v| PyBytes::new_bound(py, v).into())) + } + + #[getter] + fn old_value(&self, py: Python) -> PyResult>> { + Ok(self + .old_value + .as_ref() + .map(|v| PyBytes::new_bound(py, v).into())) + } + + #[getter] + fn new_value(&self, py: Python) -> PyResult>> { + Ok(self + .new_value + .as_ref() + .map(|v| PyBytes::new_bound(py, v).into())) + } + + fn __repr__(&self) -> String { + match self.operation_type.as_str() { + "Added" => format!( + "DiffOperation.Added(value_size={})", + self.value.as_ref().map_or(0, |v| v.len()) + ), + "Removed" => format!( + "DiffOperation.Removed(value_size={})", + self.value.as_ref().map_or(0, |v| v.len()) + ), + "Modified" => format!( + "DiffOperation.Modified(old_size={}, new_size={})", + self.old_value.as_ref().map_or(0, |v| v.len()), + self.new_value.as_ref().map_or(0, |v| v.len()) + ), + _ => "DiffOperation.Unknown".to_string(), + } + } +} + +/// Python wrapper for KvDiff +#[pyclass(name = "KvDiff")] +#[derive(Clone)] +struct PyKvDiff { + key: Vec, + operation: PyDiffOperation, +} + +#[pymethods] +impl PyKvDiff { + #[getter] + fn key(&self, py: Python) -> PyResult> { + Ok(PyBytes::new_bound(py, &self.key).into()) + } + + #[getter] + fn operation(&self) -> PyDiffOperation { + self.operation.clone() + } + + fn __repr__(&self) -> String { + format!( + "KvDiff(key={:?}, operation={})", + String::from_utf8_lossy(&self.key), + self.operation.__repr__() + ) + } +} + #[pyclass(name = "VersionedKvStore")] struct PyVersionedKvStore { inner: Arc>>, @@ -1209,6 +1299,69 @@ impl PyVersionedKvStore { Ok(py_pairs) } + + /// Compare two commits or branches and return all keys that are added, updated or deleted + /// + /// Args: + /// from_ref: Reference (branch or commit) to compare from + /// to_ref: Reference (branch or commit) to compare to + /// + /// Returns: + /// List[KvDiff]: List of differences between the two references + fn diff(&self, from_ref: String, to_ref: String) -> PyResult> { + let store = self.inner.lock().unwrap(); + + let diffs = store + .diff(&from_ref, &to_ref) + .map_err(|e| PyValueError::new_err(format!("Failed to compute diff: {}", e)))?; + + let py_diffs: Vec = diffs + .into_iter() + .map(|diff| { + let operation = match diff.operation { + DiffOperation::Added(value) => PyDiffOperation { + operation_type: "Added".to_string(), + value: Some(value), + old_value: None, + new_value: None, + }, + DiffOperation::Removed(value) => PyDiffOperation { + operation_type: "Removed".to_string(), + value: Some(value), + old_value: None, + new_value: None, + }, + DiffOperation::Modified { old, new } => PyDiffOperation { + operation_type: "Modified".to_string(), + value: None, + old_value: Some(old), + new_value: Some(new), + }, + }; + + PyKvDiff { + key: diff.key, + operation, + } + }) + .collect(); + + Ok(py_diffs) + } + + /// Get the current commit's object ID + /// + /// Returns: + /// str: The hexadecimal string representation of the current commit ID + fn current_commit(&self) -> PyResult { + let store = self.inner.lock().unwrap(); + + let commit_id = store + .current_commit() + .map_err(|e| PyValueError::new_err(format!("Failed to get current commit: {}", e)))?; + + Ok(commit_id.to_hex().to_string()) + } } #[cfg(feature = "git")] @@ -1852,6 +2005,8 @@ fn prollytree(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; m.add_class::()?; #[cfg(feature = "git")] m.add_class::()?;