Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ The full documentation includes:
- **🌳 Probabilistic Trees** - High-performance data storage with automatic balancing
- **🤖 AI Agent Memory** - Multi-layered memory systems for intelligent agents
- **📚 Versioned Storage** - Git-like version control for key-value data
- **🔐 Cryptographic Verification** - Merkle proofs for data integrity
- **🔐 Cryptographic Verification** - Merkle proofs for data integrity across trees and versioned storage
- **⚡ SQL Queries** - Query your data using SQL syntax

## 🔥 Key Use Cases
Expand Down Expand Up @@ -84,6 +84,10 @@ commit_id = store.commit("Add production config")
store.create_branch("experiment")
store.insert(b"feature", b"experimental_data")
store.commit("Add experimental feature")

# Cryptographic verification on versioned data
proof = store.generate_proof(b"config")
is_valid = store.verify_proof(proof, b"config", b"production_settings")
```

### SQL Queries
Expand Down
115 changes: 115 additions & 0 deletions python/prollytree/prollytree.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,35 @@ class StorageBackend:

def __str__(self) -> str: ...

class MergeConflict:
"""Represents a merge conflict between branches"""

@property
def key(self) -> bytes:
"""The key that has a conflict"""
...

@property
def base_value(self) -> Optional[bytes]:
"""The value in the common base commit"""
...

@property
def source_value(self) -> Optional[bytes]:
"""The value in the source branch"""
...

@property
def destination_value(self) -> Optional[bytes]:
"""The value in the destination branch"""
...

class ConflictResolution:
"""Enum representing different conflict resolution strategies"""
IgnoreAll: "ConflictResolution"
TakeSource: "ConflictResolution"
TakeDestination: "ConflictResolution"

class VersionedKvStore:
"""A versioned key-value store backed by Git and ProllyTree"""

Expand Down Expand Up @@ -442,6 +471,61 @@ class VersionedKvStore:
"""
...

def get_commits_for_key(self, key: bytes) -> List[Dict[str, Union[str, int]]]:
"""
Get all commits that contain changes to a specific key.

Args:
key: The key to search for

Returns:
List of commit dictionaries with id, author, committer, message, and timestamp
"""
...

def get_commit_history(self) -> List[Dict[str, Union[str, int]]]:
"""
Get the commit history for the repository.

Returns:
List of commit dictionaries with id, author, committer, message, and timestamp
"""
...

def merge(
self,
source_branch: str,
conflict_resolution: Optional[ConflictResolution] = None
) -> str:
"""
Merge another branch into the current branch.

Args:
source_branch: Name of the branch to merge from
conflict_resolution: Strategy for resolving conflicts (default: IgnoreAll)

Returns:
The commit ID of the merge commit

Raises:
ValueError: If merge fails or has unresolved conflicts
"""
...

def try_merge(self, source_branch: str) -> Tuple[bool, List[MergeConflict]]:
"""
Attempt to merge another branch and return any conflicts.

Args:
source_branch: Name of the branch to merge from

Returns:
Tuple of (success, conflicts) where:
- success: True if merge succeeded, False if there were conflicts
- conflicts: List of MergeConflict objects if success is False
"""
...

def storage_backend(self) -> StorageBackend:
"""
Get the current storage backend type.
Expand All @@ -450,3 +534,34 @@ class VersionedKvStore:
Storage backend enum value
"""
...

def generate_proof(self, key: bytes) -> bytes:
"""
Generate a cryptographic proof for a key's existence and value in the versioned store.

Args:
key: The key to generate proof for

Returns:
Serialized proof as bytes
"""
...

def verify_proof(
self,
proof: bytes,
key: bytes,
expected_value: Optional[bytes] = None
) -> bool:
"""
Verify a cryptographic proof for a key-value pair in the versioned store.

Args:
proof: The serialized proof to verify
key: The key that the proof claims to prove
expected_value: Optional expected value to verify against

Returns:
True if the proof is valid, False otherwise
"""
...
104 changes: 104 additions & 0 deletions src/git/versioned_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,110 @@ where

Ok(self.git_repo.path().join(staging_filename))
}

/// Generate a cryptographic proof for a key's existence and value in the tree
/// This proof can be used to verify the integrity of the key-value pair without
/// requiring access to the entire tree structure.
///
/// # Parameters
/// - `key`: The key to generate proof for
///
/// # Returns
/// - A proof object containing the hash path from root to the target node
pub fn generate_proof(&self, key: &[u8]) -> crate::proof::Proof<N> {
self.tree.generate_proof(key)
}

/// Verify a cryptographic proof for a key-value pair
/// This checks that the proof is valid and optionally verifies the expected value
///
/// # Parameters
/// - `proof`: The proof to verify
/// - `key`: The key that the proof claims to prove
/// - `expected_value`: Optional expected value to verify against
///
/// # Returns
/// - `true` if the proof is valid, `false` otherwise
pub fn verify(
&self,
proof: crate::proof::Proof<N>,
key: &[u8],
expected_value: Option<&[u8]>,
) -> bool {
self.tree.verify(proof, key, expected_value)
}
}

#[cfg(test)]
mod proof_tests {
use super::*;
use tempfile::TempDir;

#[test]
fn test_versioned_store_proof_methods() {
// Create a temporary directory for the test
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let repo_path = temp_dir.path().to_str().unwrap();

// Initialize git repo
std::process::Command::new("git")
.args(["init"])
.current_dir(repo_path)
.output()
.expect("Failed to initialize git repo");

// Set git config
std::process::Command::new("git")
.args(["config", "user.name", "Test User"])
.current_dir(repo_path)
.output()
.expect("Failed to set git user name");

std::process::Command::new("git")
.args(["config", "user.email", "test@example.com"])
.current_dir(repo_path)
.output()
.expect("Failed to set git user email");

// Create a subdirectory for the dataset (git-prolly requires this)
let dataset_path = temp_dir.path().join("dataset");
std::fs::create_dir(&dataset_path).expect("Failed to create dataset directory");

// Change to the dataset subdirectory
let original_dir = std::env::current_dir().expect("Failed to get current dir");
std::env::set_current_dir(&dataset_path).expect("Failed to change directory");

// Initialize the versioned store from the dataset subdirectory
let mut store =
GitVersionedKvStore::<32>::init(&dataset_path).expect("Failed to initialize store");

// Insert test data
let key = b"proof_test_key".to_vec();
let value = b"proof_test_value".to_vec();

store
.insert(key.clone(), value.clone())
.expect("Failed to insert");
store
.commit("Add test data for proof")
.expect("Failed to commit");

// Test generate_proof method exists and works
let proof = store.generate_proof(&key);

// Test verify method with correct value
assert!(store.verify(proof.clone(), &key, Some(&value)));

// Test verify method for existence only
assert!(store.verify(proof.clone(), &key, None));

// Test verify with wrong value should fail
let wrong_value = b"wrong_value".to_vec();
assert!(!store.verify(proof.clone(), &key, Some(&wrong_value)));

// Restore original directory
std::env::set_current_dir(original_dir).expect("Failed to restore directory");
}
}

// Generic diff functionality for all storage types
Expand Down
36 changes: 36 additions & 0 deletions src/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,42 @@ impl PyVersionedKvStore {
let store = self.inner.lock().unwrap();
Ok(store.storage_backend().clone().into())
}

fn generate_proof(&self, py: Python, key: &Bound<'_, PyBytes>) -> PyResult<Py<PyBytes>> {
let key_vec = key.as_bytes().to_vec();

let proof_bytes = py.allow_threads(|| {
let store = self.inner.lock().unwrap();
let proof = store.generate_proof(&key_vec);

bincode::serialize(&proof)
.map_err(|e| PyValueError::new_err(format!("Proof serialization failed: {}", e)))
})?;

Ok(PyBytes::new_bound(py, &proof_bytes).into())
}

#[pyo3(signature = (proof_bytes, key, expected_value=None))]
fn verify_proof(
&self,
py: Python,
proof_bytes: &Bound<'_, PyBytes>,
key: &Bound<'_, PyBytes>,
expected_value: Option<&Bound<'_, PyBytes>>,
) -> PyResult<bool> {
let key_vec = key.as_bytes().to_vec();
let proof_vec = proof_bytes.as_bytes().to_vec();
let value_option = expected_value.map(|v| v.as_bytes().to_vec());

py.allow_threads(|| {
let proof: crate::proof::Proof<32> = bincode::deserialize(&proof_vec).map_err(|e| {
PyValueError::new_err(format!("Proof deserialization failed: {}", e))
})?;

let store = self.inner.lock().unwrap();
Ok(store.verify(proof, &key_vec, value_option.as_deref()))
})
}
}

#[cfg(feature = "git")]
Expand Down