diff --git a/python/README.md b/python/README.md index 8ab82ac..9858ad8 100644 --- a/python/README.md +++ b/python/README.md @@ -39,7 +39,7 @@ The full documentation includes: - **🌳 Probabilistic Trees** - High-performance data storage with automatic balancing - **🤖 AI Agent Memory** - Multi-layered memory systems for intelligent agents - **📚 Versioned Storage** - Git-like version control for key-value data -- **🔐 Cryptographic Verification** - Merkle proofs for data integrity +- **🔐 Cryptographic Verification** - Merkle proofs for data integrity across trees and versioned storage - **⚡ SQL Queries** - Query your data using SQL syntax ## 🔥 Key Use Cases @@ -84,6 +84,10 @@ commit_id = store.commit("Add production config") store.create_branch("experiment") store.insert(b"feature", b"experimental_data") store.commit("Add experimental feature") + +# Cryptographic verification on versioned data +proof = store.generate_proof(b"config") +is_valid = store.verify_proof(proof, b"config", b"production_settings") ``` ### SQL Queries diff --git a/src/git/versioned_store.rs b/src/git/versioned_store.rs index 026cfcb..6ad07d8 100644 --- a/src/git/versioned_store.rs +++ b/src/git/versioned_store.rs @@ -605,6 +605,110 @@ where Ok(self.git_repo.path().join(staging_filename)) } + + /// Generate a cryptographic proof for a key's existence and value in the tree + /// This proof can be used to verify the integrity of the key-value pair without + /// requiring access to the entire tree structure. + /// + /// # Parameters + /// - `key`: The key to generate proof for + /// + /// # Returns + /// - A proof object containing the hash path from root to the target node + pub fn generate_proof(&self, key: &[u8]) -> crate::proof::Proof { + self.tree.generate_proof(key) + } + + /// Verify a cryptographic proof for a key-value pair + /// This checks that the proof is valid and optionally verifies the expected value + /// + /// # Parameters + /// - `proof`: The proof to verify + /// - `key`: The key that the proof claims to prove + /// - `expected_value`: Optional expected value to verify against + /// + /// # Returns + /// - `true` if the proof is valid, `false` otherwise + pub fn verify( + &self, + proof: crate::proof::Proof, + key: &[u8], + expected_value: Option<&[u8]>, + ) -> bool { + self.tree.verify(proof, key, expected_value) + } +} + +#[cfg(test)] +mod proof_tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_versioned_store_proof_methods() { + // Create a temporary directory for the test + let temp_dir = TempDir::new().expect("Failed to create temp dir"); + let repo_path = temp_dir.path().to_str().unwrap(); + + // Initialize git repo + std::process::Command::new("git") + .args(["init"]) + .current_dir(repo_path) + .output() + .expect("Failed to initialize git repo"); + + // Set git config + std::process::Command::new("git") + .args(["config", "user.name", "Test User"]) + .current_dir(repo_path) + .output() + .expect("Failed to set git user name"); + + std::process::Command::new("git") + .args(["config", "user.email", "test@example.com"]) + .current_dir(repo_path) + .output() + .expect("Failed to set git user email"); + + // Create a subdirectory for the dataset (git-prolly requires this) + let dataset_path = temp_dir.path().join("dataset"); + std::fs::create_dir(&dataset_path).expect("Failed to create dataset directory"); + + // Change to the dataset subdirectory + let original_dir = std::env::current_dir().expect("Failed to get current dir"); + std::env::set_current_dir(&dataset_path).expect("Failed to change directory"); + + // Initialize the versioned store from the dataset subdirectory + let mut store = + GitVersionedKvStore::<32>::init(&dataset_path).expect("Failed to initialize store"); + + // Insert test data + let key = b"proof_test_key".to_vec(); + let value = b"proof_test_value".to_vec(); + + store + .insert(key.clone(), value.clone()) + .expect("Failed to insert"); + store + .commit("Add test data for proof") + .expect("Failed to commit"); + + // Test generate_proof method exists and works + let proof = store.generate_proof(&key); + + // Test verify method with correct value + assert!(store.verify(proof.clone(), &key, Some(&value))); + + // Test verify method for existence only + assert!(store.verify(proof.clone(), &key, None)); + + // Test verify with wrong value should fail + let wrong_value = b"wrong_value".to_vec(); + assert!(!store.verify(proof.clone(), &key, Some(&wrong_value))); + + // Restore original directory + std::env::set_current_dir(original_dir).expect("Failed to restore directory"); + } } // Generic diff functionality for all storage types diff --git a/src/python.rs b/src/python.rs index b0d04f3..07f85d6 100644 --- a/src/python.rs +++ b/src/python.rs @@ -1123,6 +1123,42 @@ impl PyVersionedKvStore { let store = self.inner.lock().unwrap(); Ok(store.storage_backend().clone().into()) } + + fn generate_proof(&self, py: Python, key: &Bound<'_, PyBytes>) -> PyResult> { + let key_vec = key.as_bytes().to_vec(); + + let proof_bytes = py.allow_threads(|| { + let store = self.inner.lock().unwrap(); + let proof = store.generate_proof(&key_vec); + + bincode::serialize(&proof) + .map_err(|e| PyValueError::new_err(format!("Proof serialization failed: {}", e))) + })?; + + Ok(PyBytes::new_bound(py, &proof_bytes).into()) + } + + #[pyo3(signature = (proof_bytes, key, expected_value=None))] + fn verify_proof( + &self, + py: Python, + proof_bytes: &Bound<'_, PyBytes>, + key: &Bound<'_, PyBytes>, + expected_value: Option<&Bound<'_, PyBytes>>, + ) -> PyResult { + let key_vec = key.as_bytes().to_vec(); + let proof_vec = proof_bytes.as_bytes().to_vec(); + let value_option = expected_value.map(|v| v.as_bytes().to_vec()); + + py.allow_threads(|| { + let proof: crate::proof::Proof<32> = bincode::deserialize(&proof_vec).map_err(|e| { + PyValueError::new_err(format!("Proof deserialization failed: {}", e)) + })?; + + let store = self.inner.lock().unwrap(); + Ok(store.verify(proof, &key_vec, value_option.as_deref())) + }) + } } #[cfg(feature = "git")]