Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/digest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,11 @@ impl<'de, const N: usize> Deserialize<'de> for ValueDigest<N> {
where
D: serde::Deserializer<'de>,
{
let bytes: &[u8] = serde::de::Deserialize::deserialize(deserializer)?;
let array = <[u8; N]>::try_from(bytes)
.map_err(|_| serde::de::Error::invalid_length(bytes.len(), &stringify!(N)))?;
// Try to deserialize as a sequence of bytes (for JSON format)
let bytes: Vec<u8> = serde::de::Deserialize::deserialize(deserializer)?;
let array = <[u8; N]>::try_from(bytes.as_slice()).map_err(|_| {
serde::de::Error::invalid_length(bytes.len(), &format!("array of length {N}").as_str())
})?;
Ok(ValueDigest(array))
}
}
Expand Down
14 changes: 12 additions & 2 deletions src/git/operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,14 +397,24 @@ mod tests {
#[test]
fn test_git_operations_creation() {
let temp_dir = TempDir::new().unwrap();
let store = VersionedKvStore::<32>::init(temp_dir.path()).unwrap();
// Initialize git repository (regular, not bare)
gix::init(temp_dir.path()).unwrap();
// Create subdirectory for dataset
let dataset_dir = temp_dir.path().join("dataset");
std::fs::create_dir_all(&dataset_dir).unwrap();
let store = VersionedKvStore::<32>::init(&dataset_dir).unwrap();
let _ops = GitOperations::new(store);
}

#[test]
fn test_parse_commit_id() {
let temp_dir = TempDir::new().unwrap();
let store = VersionedKvStore::<32>::init(temp_dir.path()).unwrap();
// Initialize git repository (regular, not bare)
gix::init(temp_dir.path()).unwrap();
// Create subdirectory for dataset
let dataset_dir = temp_dir.path().join("dataset");
std::fs::create_dir_all(&dataset_dir).unwrap();
let store = VersionedKvStore::<32>::init(&dataset_dir).unwrap();
let ops = GitOperations::new(store);

// Test HEAD parsing
Expand Down
146 changes: 133 additions & 13 deletions src/git/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,34 +33,69 @@ pub struct GitNodeStorage<const N: usize> {
configs: Mutex<HashMap<String, Vec<u8>>>,
// Maps ProllyTree hashes to Git object IDs
hash_to_object_id: Mutex<HashMap<ValueDigest<N>, gix::ObjectId>>,
// Directory where this dataset's config and mapping files are stored
dataset_dir: std::path::PathBuf,
}

impl<const N: usize> Clone for GitNodeStorage<N> {
fn clone(&self) -> Self {
let cloned = Self {
_repository: self._repository.clone(),
cache: Mutex::new(LruCache::new(NonZeroUsize::new(1000).unwrap())),
configs: Mutex::new(HashMap::new()),
hash_to_object_id: Mutex::new(HashMap::new()),
dataset_dir: self.dataset_dir.clone(),
};

// Load the hash mappings for the cloned instance
cloned.load_hash_mappings();

cloned
}
}

impl<const N: usize> GitNodeStorage<N> {
/// Create a new GitNodeStorage instance
pub fn new(repository: gix::Repository) -> Result<Self, GitKvError> {
pub fn new(
repository: gix::Repository,
dataset_dir: std::path::PathBuf,
) -> Result<Self, GitKvError> {
let cache_size = NonZeroUsize::new(1000).unwrap(); // Default cache size

Ok(GitNodeStorage {
let storage = GitNodeStorage {
_repository: Arc::new(Mutex::new(repository)),
cache: Mutex::new(LruCache::new(cache_size)),
configs: Mutex::new(HashMap::new()),
hash_to_object_id: Mutex::new(HashMap::new()),
})
dataset_dir,
};

// Load existing hash mappings
storage.load_hash_mappings();

Ok(storage)
}

/// Create GitNodeStorage with custom cache size
pub fn with_cache_size(
repository: gix::Repository,
dataset_dir: std::path::PathBuf,
cache_size: usize,
) -> Result<Self, GitKvError> {
let cache_size = NonZeroUsize::new(cache_size).unwrap_or(NonZeroUsize::new(1000).unwrap());

Ok(GitNodeStorage {
let storage = GitNodeStorage {
_repository: Arc::new(Mutex::new(repository)),
cache: Mutex::new(LruCache::new(cache_size)),
configs: Mutex::new(HashMap::new()),
hash_to_object_id: Mutex::new(HashMap::new()),
})
dataset_dir,
};

// Load existing hash mappings
storage.load_hash_mappings();

Ok(storage)
}

/// Store a node as a Git blob
Expand Down Expand Up @@ -118,7 +153,14 @@ impl<const N: usize> NodeStorage<N> for GitNodeStorage<N> {
match self.store_node_as_blob(&node) {
Ok(blob_id) => {
// Store the mapping between ProllyTree hash and Git object ID
self.hash_to_object_id.lock().unwrap().insert(hash, blob_id);
self.hash_to_object_id
.lock()
.unwrap()
.insert(hash.clone(), blob_id);

// Persist the mapping to filesystem
self.save_hash_mapping(&hash, &blob_id);

Some(())
}
Err(_) => None,
Expand All @@ -139,14 +181,90 @@ impl<const N: usize> NodeStorage<N> for GitNodeStorage<N> {
}

fn save_config(&self, key: &str, config: &[u8]) {
// Store config in memory for now
// In a real implementation, we'd store this as a Git blob or in a config file
// Store config in memory
let mut configs = self.configs.lock().unwrap();
configs.insert(key.to_string(), config.to_vec());

// Also persist to filesystem for durability in the dataset directory
let config_path = self.dataset_dir.join(format!("prolly_config_{key}"));
let _ = std::fs::write(config_path, config);
}

fn get_config(&self, key: &str) -> Option<Vec<u8>> {
self.configs.lock().unwrap().get(key).cloned()
// First try to get from memory
if let Some(config) = self.configs.lock().unwrap().get(key).cloned() {
return Some(config);
}

// If not in memory, try to load from filesystem
let config_path = self.dataset_dir.join(format!("prolly_config_{key}"));
if let Ok(config) = std::fs::read(config_path) {
// Cache in memory for future use
self.configs
.lock()
.unwrap()
.insert(key.to_string(), config.clone());
return Some(config);
}

None
}
}

impl<const N: usize> GitNodeStorage<N> {
/// Save hash mapping to filesystem
fn save_hash_mapping(&self, hash: &ValueDigest<N>, object_id: &gix::ObjectId) {
let mapping_path = self.dataset_dir.join("prolly_hash_mappings");

// Read existing mappings
let mut mappings = if mapping_path.exists() {
std::fs::read_to_string(&mapping_path).unwrap_or_default()
} else {
String::new()
};

// Add new mapping - use simple format for now without hex dependency
let hash_bytes: Vec<String> = hash.0.iter().map(|b| format!("{b:02x}")).collect();
let hash_hex = hash_bytes.join("");
let object_hex = object_id.to_hex().to_string();
mappings.push_str(&format!("{hash_hex}:{object_hex}\n"));

// Write back
let _ = std::fs::write(mapping_path, mappings);
}

/// Load hash mappings from filesystem
fn load_hash_mappings(&self) {
let mapping_path = self.dataset_dir.join("prolly_hash_mappings");

if let Ok(mappings) = std::fs::read_to_string(mapping_path) {
let mut hash_map = self.hash_to_object_id.lock().unwrap();

for line in mappings.lines() {
if let Some((hash_hex, object_hex)) = line.split_once(':') {
// Parse hex string manually
if hash_hex.len() == N * 2 {
let mut hash_bytes = Vec::new();
for i in 0..N {
if let Ok(byte) = u8::from_str_radix(&hash_hex[i * 2..i * 2 + 2], 16) {
hash_bytes.push(byte);
} else {
break;
}
}

if hash_bytes.len() == N {
if let Ok(object_id) = gix::ObjectId::from_hex(object_hex.as_bytes()) {
let mut hash_array = [0u8; N];
hash_array.copy_from_slice(&hash_bytes);
let hash = ValueDigest(hash_array);
hash_map.insert(hash, object_id);
}
}
}
}
}
}
}
}

Expand Down Expand Up @@ -186,8 +304,8 @@ mod tests {

#[test]
fn test_git_node_storage_basic_operations() {
let (_temp_dir, repo) = create_test_repo();
let mut storage = GitNodeStorage::<32>::new(repo).unwrap();
let (temp_dir, repo) = create_test_repo();
let mut storage = GitNodeStorage::<32>::new(repo, temp_dir.path().to_path_buf()).unwrap();

let node = create_test_node();
let hash = node.get_hash();
Expand All @@ -210,8 +328,10 @@ mod tests {

#[test]
fn test_cache_functionality() {
let (_temp_dir, repo) = create_test_repo();
let mut storage = GitNodeStorage::<32>::with_cache_size(repo, 2).unwrap();
let (temp_dir, repo) = create_test_repo();
let dataset_dir = temp_dir.path().join("dataset");
std::fs::create_dir_all(&dataset_dir).unwrap();
let mut storage = GitNodeStorage::<32>::with_cache_size(repo, dataset_dir, 2).unwrap();

let node1 = create_test_node();
let hash1 = node1.get_hash();
Expand Down
Loading