In [3]:
import zlib
import hashlib
import os
from binascii import hexlify, unhexlify

In [41]:
def sha1_to_path(sha1, directory):
    prefix = sha1[:2]
    suffix = sha1[2:]
    path = '/'.join([directory, 'objects', prefix, suffix])
    return path
def sha1_to_directory(sha1, directory):
    prefix = sha1[:2]
    path = '/'.join([directory, 'objects', prefix])
    return path
def disambiguate_sha1(sha1, directory):
    prefix = sha1[:2]
    suffix = sha1[2:]
    matching_files = matching_files = [file for file in os.listdir(sha1_to_directory(sha1, directory)) if file.startswith(suffix)]
    if len(matching_files) == 0:
        raise Exception('No object exists with that SHA1.')
    elif len(matching_files) > 1:
        raise Exception('Ambiguous SHA1 provided.')
    else:
        return prefix + matching_files[0]

In [38]:
def pretty_print(sha1, directory='.git'):
    if len(sha1) < 40:
        sha1 = disambiguate_sha1(sha1, directory)
    path = sha1_to_path(sha1, directory)
    with open(path, mode='rb') as compressed:
        decompressed = zlib.decompress(compressed.read())
    entries = decompressed.split(b'\x00')
    header, *content = entries
    object_type = header.split(b' ')[0]
    if object_type == b'blob':
        return [entry.decode() for entry in [header, *content]]
    elif object_type == b'tree':
        objects = []
        for i,obj in enumerate(content):
            if i==len(content)-1:
                objects.append(hexlify(obj))
            elif i==0:
                objects.append(obj)
            else:
                hash, name = obj[:20], obj[20:]
                print(hash)
                print(name)
                objects.append(hexlify(hash))
                objects.append(name)
        return [entry.decode() for entry in [header, *objects]]
pretty_print('8accee2c256b491ea0d9c138d0bbc94d064ba590')

['tree 37', '100644 README.md', 'ad0ec6bc18160c7aa60597510e9068a9db8e02b8']

[b'tree 77', b'100644 Duffer.ipynb', b'\xde\xf1\xc9C\xe2\x07\xdf\x97\xdb\x1b\xe4\xb5\x9d\x02K\x92\xda\xaf\xd0\xe2100644 README.md', b'\xad\x0e\xc6\xbc\x18\x16\x0cz\xa6\x05\x97Q\x0e\x90h\xa9\xdb\x8e\x02\xb8']


[b'tree 77',
 b'100644 Duffer.ipynb',
 b'def1c943e207df97db1be4b59d024b92daafd0e231303036343420524541444d452e6d64',
 b'',
 b'ad0ec6bc18160c7aa60597510e9068a9db8e02b8']

In [7]:
def store_blob(content, directory='.git'):
    header = 'blob ' + str(len(content)) + '\x00'
    store = header + content
    hash = hashlib.sha1()
    hash.update(store.encode('utf8'))
    sha1 = hash.hexdigest()
    path = sha1_to_path(sha1, directory)
    if os.path.isfile(path):
        # Given how unlikely it is that there has been a hash collision,
        # assume the user is writing the same blob again.
        pass
    else:
        os.makedirs(sha1_to_directory(sha1, directory), exist_ok=True)
        with open(path, mode='wb') as file:
            file.write(zlib.compress(store.encode()))
    return sha1

In [34]:
store_blob('hello world')

'95d09f2b10159347eece71399a7e2e907ea3df4f'

In [39]:
pretty_print('95')

d09f2b10159347eece71399a7e2e907ea3df4f


['blob 11', 'hello world']