Skip to content

Commit

Permalink
Merge pull request #269 from genedna/main
Browse files Browse the repository at this point in the history
Refactoring the git internal #238
  • Loading branch information
genedna committed Nov 28, 2023
2 parents 434d606 + 0b0bcd2 commit 79232b9
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 34 deletions.
7 changes: 6 additions & 1 deletion mercury/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,9 @@ edition = "2021"

[dependencies]
common = { path = "../common"}
git = { path = "../git" }
serde = {version = "1.0.193", features = ["derive"]}
bstr = "1.8.0"
colored = "2.0.4"
sha1_smol = "1.0.0"
hex = "0.4.3"
thiserror = "1.0.50"
73 changes: 73 additions & 0 deletions mercury/src/errors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
//!
//!
//!
//!

use std::string::FromUtf8Error;

use thiserror::Error;

#[derive(Error, Debug)]
#[allow(unused)]
pub enum GitError {
#[error("The `{0}` is not a valid git object type.")]
InvalidObjectType(String),

#[error("The `{0}` is not a valid git blob object.")]
InvalidBlobObject(String),

#[error("The `{0}` is not a valid git tree object.")]
InvalidTreeObject(String),

#[error("The `{0}` is not a valid git tree item.")]
InvalidTreeItem(String),

#[error("`{0}`.")]
EmptyTreeItems(String),

#[error("The `{0}` is not a valid git commit signature.")]
InvalidSignatureType(String),

#[error("The `{0}` is not a valid git commit object.")]
InvalidCommitObject(String),

#[error("The `{0}` is not a valid git tag object.")]
InvalidTagObject(String),

#[error("The `{0}` is not a valid idx file.")]
InvalidIdxFile(String),

#[error("The `{0}` is not a valid pack file.")]
InvalidPackFile(String),

#[error("The `{0}` is not a valid pack header.")]
InvalidPackHeader(String),

#[error("The {0} is not a valid Hash value ")]
InvalidHashValue(String),

#[error("Delta Object Error Info:{0}")]
DeltaObjectError(String),

#[error("The object to be packed is incomplete ,{0}")]
UnCompletedPackObject(String),

#[error("Error decode in the Object ,info:{0}")]
InvalidObjectInfo(String),

#[error("Can't found Hash value :{0} from current file")]
NotFountHashValue(String),

#[error("Can't encode the object which id [{0}] to bytes")]
EncodeObjectError(String),

#[error("UTF-8 conversion error: {0}")]
ConversionError(String),
}

impl From<FromUtf8Error> for GitError {
fn from(err: FromUtf8Error) -> Self {
// convert the FromUtf8Error to GitError and return it
GitError::ConversionError(err.to_string())
}
}
149 changes: 149 additions & 0 deletions mercury/src/hash.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
//! In Git, the SHA-1 hash algorithm is widely used to generate unique identifiers for Git objects.
//! Each Git object corresponds to a unique SHA-1 hash value, which is used to identify the object's
//! location in the Git database.
//!

use std::fmt::Display;

use colored::Colorize;
use sha1_smol::Digest;
use serde::{Deserialize, Serialize};

/// The Hash struct which only contain the u8 array :`[u8;20]` is used to represent Git hash IDs,
/// which are 40-character hexadecimal strings computed using the SHA-1 algorithm. In Git, each object
/// is assigned a unique hash ID based on its content, which is used to identify
/// the object's location in the Git database.The Hash struct provides a convenient
/// way to store and manipulate Git hash IDs by using a separate struct for hash IDs to make
/// code more readable and maintainable.
#[allow(unused)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default,Deserialize, Serialize)]
pub struct SHA1(pub [u8; 20]);

/// Display trait for Hash type
impl Display for SHA1 {
/// # Attention
/// cause of the color chars for ,if you want to use the string without color ,
/// please call the func:`to_plain_str()` rather than the func:`to_string()`
/// # Example
/// the hash value `18fd2deaaf152c7f1222c52fb2673f6192b375f0`<br>
/// will be the `1;31m8d2deaaf152c7f1222c52fb2673f6192b375f00m`
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.to_plain_str().red().bold())
}
}

impl std::str::FromStr for SHA1 {
type Err = &'static str;

/// Create Hash from a string, which is a 40-character hexadecimal string already calculated
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut h = SHA1::default();

let d = Digest::from_str(s);

match d {
Ok(d) => h.0.copy_from_slice(d.bytes().as_slice()),
Err(_e) => return Err("Hash from string encounter error"),
}

Ok(h)
}
}

impl SHA1 {
/// Calculate the SHA-1 hash of `Vec<u8>` data, then create a Hash value
pub fn new(data: &Vec<u8>) -> SHA1 {
// Create a Sha1 object for calculating the SHA-1 hash
let s = sha1_smol::Sha1::from(data);
// Get the result of the hash
let sha1 = s.digest();
// Convert the result to a 20-byte array
let result = sha1.bytes();

SHA1(result)
}

/// Create Hash from a byte array, which is a 20-byte array already calculated
pub fn from_bytes(bytes: &[u8]) -> SHA1 {
let mut h = SHA1::default();
h.0.copy_from_slice(bytes);

h
}

/// Export sha1 value to plain String without the color chars
pub fn to_plain_str(self) -> String {
hex::encode(self.0)
}

/// Export sha1 value to a byte array
pub fn to_data(self) -> Vec<u8> {
self.0.to_vec()
}

}

#[cfg(test)]
mod tests {
use std::str::FromStr;

use crate::hash::SHA1;

#[test]
fn test_hash_new() {
let hash = SHA1::from_bytes(&[
0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d,
]);
assert_eq!(
hash.to_plain_str(),
"8ab686eafeb1f44702738c8b0f24f2567c36da6d"
);
}

#[test]
fn test_hash_from_bytes() {
let hash = SHA1::from_bytes(&[
0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d,
]);
assert_eq!(
hash.to_plain_str(),
"8ab686eafeb1f44702738c8b0f24f2567c36da6d"
);
}

#[test]
fn test_hash_from_str() {
let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";

match SHA1::from_str(hash_str) {
Ok(hash) => {
assert_eq!(
hash.to_plain_str(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
},
Err(e) => println!("Error: {}", e),
}
}

#[test]
fn test_hash_to_data() {
let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";

match SHA1::from_str(hash_str) {
Ok(hash) => {
assert_eq!(
hash.to_data(),
vec![
0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d
]
);
},
Err(e) => println!("Error: {}", e),

}
}


}
1 change: 1 addition & 0 deletions mercury/src/internal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
//!


pub mod object;
pub mod pack;
31 changes: 3 additions & 28 deletions mercury/src/internal/object/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,9 @@
//!
//!
//!
use std::io::{BufRead, Read};
use std::io::{Read, Write};
use std::fmt::Display;

/// The [`ObjectT`] Trait is for the Blob、Commit、Tree and Tag Structs , which are four common object
/// of Git object . In that case, the four kinds of object can be store in same `Arc<dyn ObjectT>`.
///
/// This trait receive a "Reader" to generate the target object. We now have two kind of "Reader":
///
/// 1. ReadBoxed. Input the zlib stream of four kinds of objects data stream. The Object should be the
/// base objects ,that is ,"Blob、Commit、Tree and Tag". After read, Output Object will auto compute hash
/// value while call the "read" method.
/// 2. DeltaReader. To deal with the DELTA object store in the pack file,including the Ref Delta Object
/// and the Offset Delta Object. Its' input "read" is always the `ReadBoxed`, cause the delta data is also
/// the zlib stream, which should also be unzip.
pub trait ObjectT: Send + Sync + Display {
/// Generate a new Object from a `ReadBoxed<BufRead>`.
/// the input size,is only for new a vec with directive space allocation
/// the Input data stream and Output object should be plain base object.
fn new_from_read<R: BufRead>(read: &mut ReadBoxed<R>, size: usize) -> Self
where
Self: Sized,
{
let mut content: Vec<u8> = Vec::with_capacity(size);
read.read_to_end(&mut content).unwrap();
let h = read.hash.clone();
let hash_str = h.finalize();
let mut result = Self::new_from_data(content);
result.set_hash(Hash::new_from_str(&format!("{:x}", hash_str)));
pub trait ObjectTrait: Read + Write + Send + Sync + Display {

result
}
}
10 changes: 5 additions & 5 deletions mercury/src/internal/pack/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@ use std::io::Read;

use common::utils;

use git::hash::Hash;
use git::internal::object::ObjectT;
use git::errors::GitError;
use crate::hash::SHA1;
use crate::internal::object::ObjectTrait;
use crate::errors::GitError;

use crate::cache::Cache;

#[allow(unused)]
pub struct Pack {
pub number: usize,
pub signature: Hash,
pub objects: Box<dyn Cache<T = Arc<dyn ObjectT>>>,
pub signature: SHA1,
pub objects: Box<dyn Cache<T = Arc<dyn ObjectTrait>>>,
}

impl Pack {
Expand Down
3 changes: 3 additions & 0 deletions mercury/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@


pub mod cache;
pub mod internal;
pub mod hash;
pub mod errors;

#[cfg(test)]
mod tests {
Expand Down

0 comments on commit 79232b9

Please sign in to comment.