In [1]:
import zlib
import hashlib
import os
from binascii import hexlify, unhexlify

In [2]:
def sha1_to_path(sha1, directory):
    prefix = sha1[:2]
    suffix = sha1[2:]
    path = '/'.join([directory, 'objects', prefix, suffix])
    return path
def sha1_to_directory(sha1, directory):
    prefix = sha1[:2]
    path = '/'.join([directory, 'objects', prefix])
    return path
def disambiguate_sha1(sha1, directory):
    prefix = sha1[:2]
    suffix = sha1[2:]
    matching_files = matching_files = [file for file in os.listdir(sha1_to_directory(sha1, directory)) if file.startswith(suffix)]
    if len(matching_files) == 0:
        raise Exception('No object exists with that SHA1.')
    elif len(matching_files) > 1:
        raise Exception('Ambiguous SHA1 provided.')
    else:
        return prefix + matching_files[0]

In [3]:
def pretty_print(sha1, directory='.git'):
    if len(sha1) < 40:
        sha1 = disambiguate_sha1(sha1, directory)
    path = sha1_to_path(sha1, directory)
    with open(path, mode='rb') as compressed:
        decompressed = zlib.decompress(compressed.read())
    entries = decompressed.split(b'\x00')
    header, *content = entries
    object_type = header.split(b' ')[0]
    if object_type == b'blob':
        return [entry.decode() for entry in [header, *content]]
    elif object_type == b'tree':
        objects = []
        for i,obj in enumerate(content):
            if i==len(content)-1:
                objects.append(hexlify(obj))
            elif i==0:
                objects.append(obj)
            else:
                hash, name = obj[:20], obj[20:]
                objects.append(hexlify(hash))
                objects.append(name)
        return [entry.decode() for entry in [header, *objects]]
    elif object_type == b'commit':
        return [entry.decode() for entry in [header, *content]]
pretty_print('8accee2c256b491ea0d9c138d0bbc94d064ba590')

['tree 37', '100644 README.md', 'ad0ec6bc18160c7aa60597510e9068a9db8e02b8']

In [4]:
tree_object = pretty_print('8accee2c256b491ea0d9c138d0bbc94d064ba590')[1:]
tree_object[0] = tree_object[0].encode()
tree_object[1] = unhexlify(tree_object[1])
len(b'\x00'.join(tree_object))

37

In [5]:
def store_blob(content, directory='.git'):
    header = 'blob ' + str(len(content)) + '\x00'
    store = header + content
    hash = hashlib.sha1()
    hash.update(store.encode('utf8'))
    sha1 = hash.hexdigest()
    path = sha1_to_path(sha1, directory)
    if os.path.isfile(path):
        # Given how unlikely it is that there has been a hash collision,
        # assume the user is writing the same blob again.
        pass
    else:
        os.makedirs(sha1_to_directory(sha1, directory), exist_ok=True)
        with open(path, mode='wb') as file:
            file.write(zlib.compress(store.encode()))
    return sha1

In [6]:
store_blob('hello world')

'95d09f2b10159347eece71399a7e2e907ea3df4f'

In [7]:
pretty_print('95')

['blob 11', 'hello world']

In [8]:
pretty_print('770c7bf919eed5444f4fae8df324536d77f2bdd4')

['tree 77',
 '100644 Duffer.ipynb',
 '997db51291f7490e756f7b3805d791a615e5d04f',
 '100644 README.md',
 'ad0ec6bc18160c7aa60597510e9068a9db8e02b8']

In [9]:
class GitObject:

    @property
    def sha1(self):
        hash = hashlib.sha1()
        hash.update(self.store)
        return hash.hexdigest()
    
    def __repr__(self):
        return self.__class__.__name__ + '(' + repr(self.content) + ')'

class NamedObject:
    
    @property
    def entry(self):
        mode_bytestring = '{:o}'.format(self.mode).zfill(6).encode()
        return mode_bytestring + b' ' + self.name.encode() + b'\x00' + unhexlify(self.sha1)

In [10]:
class Blob(GitObject):
    
    def __init__(self, content):
        self.content = content
    
    @property
    def store(self):
        header = 'blob ' + str(len(self.content)) + '\x00'
        store = header + self.content
        return store.encode()

class NamedBlob(NamedObject, Blob):
    
    mode = 0o100644
    
    def __init__(self, content, name):
        self.content = content
        self.name = name

In [11]:
class Tree(GitObject):
    
    def __init__(self, content):
        self.content = sorted(content, key=lambda object: object.name)
    
    @property
    def store(self):
        entries = []
        for obj in sorted(self.content, key=lambda entry: entry.name):
            entries.append(obj.entry)
        content = b''.join(entries)
        header = 'tree ' + str(len(content)) + '\x00'
        store = header.encode() + content
        return store

class NamedTree(NamedObject, Tree):
    
    mode = 0o040000
    
    def __init__(self, content, name):
        self.name = name
        self.content = sorted(content, key=lambda object: object.name)
     
    
class LazyNamedTree(NamedObject):
    
    mode = 0o040000
    
    def __init__(self, sha1, name):
        self.name = name
        self.sha1 = sha1

In [12]:
def store(obj, directory='.git'):
    sha1 = obj.sha1
    path = sha1_to_path(sha1, directory)
    if os.path.isfile(path):
        # Given how unlikely it is that there has been a hash collision,
        # assume the user is writing the same blob again.
        pass
    else:
        os.makedirs(sha1_to_directory(sha1, directory), exist_ok=True)
        with open(path, mode='wb') as file:
            file.write(zlib.compress(obj.store))
    return sha1

In [13]:
store(Blob('hello world'))

'95d09f2b10159347eece71399a7e2e907ea3df4f'

In [14]:
README = NamedBlob(open('README.md').read(), 'README.md')
Duffer_ipynb = NamedBlob(pretty_print('997db51291f7490e756f7b3805d791a615e5d04f')[1], 'Duffer.ipynb')
Duffer_ipynb.sha1
Tree([Duffer_ipynb, README]).sha1

'770c7bf919eed5444f4fae8df324536d77f2bdd4'