In [1]:
{-# LANGUAGE OverloadedStrings #-}

-- :!if [ -d solarized/ ]; then rm -rf solarized; fi
-- :!git clone https://github.com/altercation/solarized
:!cd solarized
:!ls .git/objects/pack

pack-44abe4e456180e053619e0422cb92be7fe650af5.idx
pack-44abe4e456180e053619e0422cb92be7fe650af5.pack

In [2]:
import Git.Loose (Ref, parsed, parseBinRef)

In [3]:
import System.Directory (getDirectoryContents)
import System.FilePath  ((</>), takeExtension)

getPackIndices :: FilePath -> IO [FilePath]
getPackIndices path = do
    let packPath = path </> "objects" </> "pack"
    files <- getDirectoryContents packPath
    let indices = filter (\f -> takeExtension f == ".idx") files
    return $ map (packPath </>) indices

getPackIndices ".git"

[".git/objects/pack/pack-44abe4e456180e053619e0422cb92be7fe650af5.idx"]

In [4]:
import           Data.ByteString      (ByteString)
import qualified Data.ByteString as B

packIndex <- B.readFile . head =<< getPackIndices ".git"

In [5]:
import           Data.Attoparsec.ByteString            (Parser)
import qualified Data.Attoparsec.ByteString.Char8 as A
import qualified Data.Attoparsec.ByteString       as A (word8)
import           Data.Word                             (Word8)

parsePackIndexHeader :: Parser [Word8]
parsePackIndexHeader = do
    traverse A.word8 start
    traverse A.word8 version
    where
        start   = [255, 116, 79, 99]
        version = [0, 0, 0, 2]

parsed parsePackIndexHeader packIndex

[0,0,0,2]

In [6]:
import Control.Monad (replicateM)
import Data.Bits

fromBytes :: (Bits a, Integral a) => ByteString -> a
fromBytes = B.foldl' (\l r -> (l `shiftL` 8) + fromIntegral r) 0

parseWord32 :: Integral a => Parser a
parseWord32 = fromIntegral . fromBytes <$> A.take 4

parseTotals :: Parser [Int]
parseTotals = replicateM 256 parseWord32

cumulativeTotals = parsed (parsePackIndexHeader *> parseTotals) packIndex
cumulativeTotals

[10,16,22,37,45,55,62,66,72,81,88,95,105,115,125,131,141,146,153,161,172,184,194,201,208,223,230,234,242,249,267,280,290,297,303,308,313,321,331,340,355,364,376,383,387,398,409,421,432,444,456,465,475,483,492,497,508,512,520,523,532,540,549,569,577,581,586,601,606,615,622,631,638,651,662,676,685,694,703,715,727,733,743,749,754,763,772,780,786,794,802,812,820,827,837,848,853,860,866,876,887,900,908,917,928,932,934,943,949,957,968,983,994,1008,1017,1025,1038,1044,1052,1063,1076,1082,1087,1096,1104,1115,1120,1127,1140,1148,1157,1165,1172,1181,1191,1198,1206,1211,1221,1232,1235,1245,1253,1264,1269,1284,1288,1298,1304,1309,1314,1322,1328,1338,1344,1350,1359,1367,1374,1384,1391,1403,1411,1419,1430,1438,1449,1462,1467,1477,1481,1498,1507,1516,1526,1539,1549,1555,1559,1566,1574,1580,1590,1599,1607,1614,1618,1624,1632,1638,1648,1657,1661,1666,1672,1676,1684,1689,1701,1714,1721,1731,1741,1749,1757,1761,1771,1779,1789,1797,1808,1814,1817,1821,1827,1838,1852,1862,1869,1879,1884,1895,1904,1908,1919

In [7]:
total = last cumulativeTotals

parseCRC32s :: Int -> Parser [Int]
parseCRC32s n = replicateM n parseWord32

crc32s = parsed (parsePackIndexHeader *> parseTotals *> parseCRC32s total) packIndex

length crc32s

2186

In [8]:
parseBinRefs :: Int -> Parser [Ref]
parseBinRefs n = replicateM n parseBinRef

refs = parsed (parsePackIndexHeader *> parseTotals *> parseCRC32s total *> parseBinRefs total) packIndex

head refs

ecaf0f0c6ebf660a697ace32d1bed1b431a1426a

In [9]:
parseOffsets :: Int -> Parser [Int]
parseOffsets n = replicateM n parseWord32

offsets = parsed (parsePackIndexHeader *> parseTotals *> parseCRC32s total *> parseBinRefs total *> parseOffsets total) packIndex

head offsets

4322927

In [10]:
getFifthOffsets :: ByteString -> [Int]
getFifthOffsets bs
    | B.null bs = []
    | otherwise = let (h, t) = B.splitAt 8 bs in fromIntegral (fromBytes h) : getFifthOffsets t
    
computeOffset :: [Int] -> Int -> Int
computeOffset fifth offset = if offset < bit 31 then offset else fifth !! (offset - bit 31)

remainder = parsed (parsePackIndexHeader *> parseTotals *> parseCRC32s total *> parseBinRefs total *> parseOffsets total *> A.takeByteString) packIndex

getFifthOffsets $ B.take (B.length remainder - 40) remainder

parsed parseBinRef remainder
parsed (parseBinRef *> parseBinRef) remainder

[]

44abe4e456180e053619e0422cb92be7fe650af5

2295b2642e03cfc7557ecbddc4e757666d48a387

In [11]:
data PackIndexEntry = PackIndexEntry Int Ref Int deriving (Eq, Show)

parsePackIndex :: Parser [PackIndexEntry]
parsePackIndex = do
    parsePackIndexHeader
    totals    <- parseTotals
    let total = last totals
    crc32s    <- parseCRC32s total
    refs      <- parseBinRefs total
    offsets'  <- parseOffsets total
    remainder <- A.takeByteString
    let (fifth, checksums) = B.splitAt (B.length remainder - 40) remainder
    let fifthOffsets = getFifthOffsets fifth
    let offsets = map (computeOffset fifthOffsets) offsets'
    return $ zipWith3 PackIndexEntry crc32s refs offsets

packIndexEntries = parsed parsePackIndex packIndex

head packIndexEntries

PackIndexEntry 315724 "ecaf0f0c6ebf660a697ace32d1bed1b431a1426a" 4322927