# Hexdump and Hexdiff

> Converting bytes to characters

In [None]:
#| default_exp hexdump

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import ctypes
from collections import UserList
from pretty_ctypes.hexint import hexint, asciiint, HexInt, NamedInt, ctypes_int_types, ctypes_signed_types
from pretty_ctypes.utils import colored, batched

In [None]:
#| export

def hexdump_line(chunk, bits, ascii=None, width=128, highlight=None):
  if ascii is None: ascii = True if bits == 8 else False
  if highlight is None: highlight = [None]*len(chunk)
  highlight = highlight + ([None]*(len(chunk)-len(highlight)))

  # Group the output into 64-bit groups if it's long: 40 41 42 43 44 45 46 47  48 49 4A 4B 4C 4D 4E 4F
  sub_width = 64 if width >= 128 else width
  res = "  ".join(" ".join(colored(hexint(d, bits), hl) for d, hl in sub)
                  for sub in batched(list(zip(chunk, highlight)), sub_width // bits))


  # Pad with spaces in case this needs to be align
  res += ((" " * (bits // 4)) + " ") * (width//bits - (len(chunk))) + (" " if len(chunk) < sub_width//bits else "")
  if ascii:
    res += f"  |{''.join(colored(asciiint(d, bits), hl) for d, hl in zip(chunk, highlight, strict=False))}|" + (" " * (width//bits - (len(chunk))))
  return res

In [None]:
print(hexdump_line(range(16), 8))

00 01 02 03 04 05 06 07  08 09 0A 0B 0C 0D 0E 0F  |................|


In [None]:
print(hexdump_line(range(0x12345678, 0x12345678+4), 32, ascii=True))

12345678 12345679  1234567A 1234567B  |.4Vx.4Vy.4Vz.4V{|


In [None]:
print(hexdump_line(range(0x12345678, 0x12345678+4), 32, ascii=True, highlight=["red", "green", "BLACK", (None, "blue")]))

[31m12345678[0m [32m12345679[0m  [90m1234567A[0m [44m1234567B[0m  |[31m.4Vx[0m[32m.4Vy[0m[90m.4Vz[0m[44m.4V{[0m|


In [None]:
print(hexdump_line(range(32), 8, ascii=True, width=128))

00 01 02 03 04 05 06 07  08 09 0A 0B 0C 0D 0E 0F  10 11 12 13 14 15 16 17  18 19 1A 1B 1C 1D 1E 1F  |................................|


In [None]:
#| export

def hexdump(data, bits, highlight=None, ascii=None, width=128):
    width = max(width, bits)
    if highlight is None: highlight = []

    if len(highlight) < len(data):
        highlight = highlight + ([None] * (len(data) - len(highlight)))

    chunks = zip(batched(data, width // bits), batched(highlight, width // bits))
    lines = [hexdump_line(d, bits=bits, ascii=ascii, width=width, highlight=h) for d, h in chunks]

    addr_digits = len(f"{(len(lines) - 1)*(width//8):X}")
    return "\n".join(f"{(i*width)//8:0{addr_digits}X}: {lines[i]}" for i in range(len(lines)))

In [None]:
print(hexdump(list(range(64)), 8, highlight=["BLACK", None, "red", "RED"]))

00: [90m00[0m 01 [31m02[0m [91m03[0m 04 05 06 07  08 09 0A 0B 0C 0D 0E 0F  |[90m.[0m.[31m.[0m[91m.[0m............|
10: 10 11 12 13 14 15 16 17  18 19 1A 1B 1C 1D 1E 1F  |................|
20: 20 21 22 23 24 25 26 27  28 29 2A 2B 2C 2D 2E 2F  | !"#$%&'()*+,-./|
30: 30 31 32 33 34 35 36 37  38 39 3A 3B 3C 3D 3E 3F  |0123456789:;<=>?|


In [None]:
print(hexdump(range(48, 64), 32, ascii=True, highlight=["BLACK", None, "red", "RED"]*2))

00: [90m00000030[0m 00000031  [31m00000032[0m [91m00000033[0m  |[90m...0[0m...1[31m...2[0m[91m...3[0m|
10: [90m00000034[0m 00000035  [31m00000036[0m [91m00000037[0m  |[90m...4[0m...5[31m...6[0m[91m...7[0m|
20: 00000038 00000039  0000003A 0000003B  |...8...9...:...;|
30: 0000003C 0000003D  0000003E 0000003F  |...<...=...>...?|


In [None]:
data = list(range(8)) * 8
print(hexdump(data, bits=8, highlight=[ "BLACK" if d==0 else None for d in data] ))

00: [90m00[0m 01 02 03 04 05 06 07  [90m00[0m 01 02 03 04 05 06 07  |[90m.[0m.......[90m.[0m.......|
10: [90m00[0m 01 02 03 04 05 06 07  [90m00[0m 01 02 03 04 05 06 07  |[90m.[0m.......[90m.[0m.......|
20: [90m00[0m 01 02 03 04 05 06 07  [90m00[0m 01 02 03 04 05 06 07  |[90m.[0m.......[90m.[0m.......|
30: [90m00[0m 01 02 03 04 05 06 07  [90m00[0m 01 02 03 04 05 06 07  |[90m.[0m.......[90m.[0m.......|


In [None]:
#| export

def hexdiff(data1, data2, bits, ascii=None, width=128):
    """
    Compare two arrays side by side in hexdump format, highlighting differences in CYAN color.

    Args:
        data1: First array to compare
        data2: Second array to compare
        bits: Number of bits per element (8, 16, 32, etc.)
        ascii: Whether to show ASCII representation (default: True for 8-bit, False otherwise)
        width: Width of each line in bits (default: 128)

    Returns:
        String with the side-by-side hexdump comparison
    """
    width = max(width, bits)
    if ascii is None: ascii = True if bits == 8 else False

    elems_per_line = width // bits

    lines_data1 = (len(data1) + elems_per_line - 1) // elems_per_line
    lines_data2 = (len(data2) + elems_per_line - 1) // elems_per_line
    total_lines = max(lines_data1, lines_data2)

    addr_digits = len(f"{(total_lines - 1)*(width//8):X}")
    result = []

    for i in range(total_lines):
        start_idx = i * elems_per_line
        end_idx = min(start_idx + elems_per_line, len(data1))
        end_idx2 = min(start_idx + elems_per_line, len(data2))

        chunk1 = data1[start_idx:end_idx] if start_idx < len(data1) else []
        chunk2 = data2[start_idx:end_idx2] if start_idx < len(data2) else []

        highlight = []

        for j in range(max(len(chunk1), len(chunk2))):
            if j >= len(chunk1) or j >= len(chunk2): highlight.append("yellow")
            elif chunk1[j] != chunk2[j]:             highlight.append("CYAN")
            elif chunk1[j] == 0:                     highlight.append("BLACK")
            else:                                    highlight.append(None)

        line1 = hexdump_line(chunk1, bits, ascii=ascii, width=width, highlight=highlight) if chunk1 else ""
        line2 = hexdump_line(chunk2, bits, ascii=ascii, width=width, highlight=highlight) if chunk2 else ""

        sep = colored("=>", "CYAN") if chunk1 != chunk2 else "  "
        addr = f"{i*(width//8):0{addr_digits}X}"

        result.append(f"{addr}: {line1} {sep} {line2}")

    return "\n".join(result)

In [None]:
data1 = list(range(32)) + [32, 33, 34, 35] + list(range(40, 64))
data2 = list(range(32)) + [1, 2, 3, 4]     + list(range(40, 60)) + [1,2,3,4,64, 65]

print(hexdiff(data1, data2, 8))

00: [90m00[0m 01 02 03 04 05 06 07  08 09 0A 0B 0C 0D 0E 0F  |[90m.[0m...............|    [90m00[0m 01 02 03 04 05 06 07  08 09 0A 0B 0C 0D 0E 0F  |[90m.[0m...............|
10: 10 11 12 13 14 15 16 17  18 19 1A 1B 1C 1D 1E 1F  |................|    10 11 12 13 14 15 16 17  18 19 1A 1B 1C 1D 1E 1F  |................|
20: [96m20[0m [96m21[0m [96m22[0m [96m23[0m 28 29 2A 2B  2C 2D 2E 2F 30 31 32 33  |[96m [0m[96m![0m[96m"[0m[96m#[0m()*+,-./0123| [96m=>[0m [96m01[0m [96m02[0m [96m03[0m [96m04[0m 28 29 2A 2B  2C 2D 2E 2F 30 31 32 33  |[96m.[0m[96m.[0m[96m.[0m[96m.[0m()*+,-./0123|
30: 34 35 36 37 38 39 3A 3B  [96m3C[0m [96m3D[0m [96m3E[0m [96m3F[0m              |456789:;[96m<[0m[96m=[0m[96m>[0m[96m?[0m|     [96m=>[0m 34 35 36 37 38 39 3A 3B  [96m01[0m [96m02[0m [96m03[0m [96m04[0m [33m40[0m [33m41[0m        |456789:;[96m.[0m[96m.[0m[96m.[0m[96m.[0m[33m@[0m[33mA[0m|  


In [None]:
#| export

class HexIntArray():
    def __init__(self, data=None, bits=32, signed=False):
        assert bits >= 4, f"data.bits must be at least 4, got {bits}"
        # assert bits <= 64, f"data.bits must not exceed 64, got {bits}"
        self.bits, self.signed = bits, signed
        self.data = list(data) if data is not None else []

    def __len__(self): return len(self.data)
    def __getitem__(self, key): return self.data[key]
    def __iter__(self): return iter(self.data)


    @classmethod
    def from_ctypes(cls, ct):
        assert isinstance(ct, ctypes.Array), f"Expected ct to be ctypes.Array, got {type(ct)}"
        assert ct._type_ in ctypes_int_types, f"Expected ct._type to be in {ctypes_int_types}, got {ct._type_}"
        assert len(ct), f"Expected ct to be non-empty"
        bits = 1 if ct._type_ == ctypes.c_bool else ctypes.sizeof(ct._type_)*8
        return cls(ct, bits, ct._type_ in ctypes_signed_types)

    def hexdump(self, ascii=None, width=128):
        # validate_hexdump_input(data)
        # bits = data[0].bits
        width = max(width, self.bits)
        hl = ["BLACK" if d==0 else None for d in self.data]
        chunks = list(zip(batched(self.data, width // self.bits), batched(hl, width // self.bits)))
        lines = [ hexdump_line(c, bits=self.bits, ascii=ascii, width=width, highlight=h) for c, h in chunks]
        addr_digits = len(f"{(len(lines) - 1)*(width//8):X}")
        return "\n".join(f"{(i*width)//8:0{addr_digits}X}: {lines[i]}" for i in range(len(lines)))

    def diff(self, other, ascii=None, width=128):
        assert self.bits == other.bits, f"Expected self.bits to be equal to other.bits, got {self.bits} and {other.bits}"
        return hexdiff(self.data, other.data, bits=self.bits, ascii=ascii, width=width)

In [None]:
a = HexIntArray([0x12345678, 0x9ABCDEF0, 0, 1], bits=32, signed=False)
print(a.hexdump(ascii=True))

0: 12345678 9ABCDEF0  [90m00000000[0m 00000001  |.4Vx....[90m....[0m....|


In [None]:
a = (ctypes.c_uint16 * 64)(*range(64))
print(HexIntArray.from_ctypes(a).hexdump(ascii=True))

00: [90m0000[0m 0001 0002 0003  0004 0005 0006 0007  |[90m..[0m..............|
10: 0008 0009 000A 000B  000C 000D 000E 000F  |................|
20: 0010 0011 0012 0013  0014 0015 0016 0017  |................|
30: 0018 0019 001A 001B  001C 001D 001E 001F  |................|
40: 0020 0021 0022 0023  0024 0025 0026 0027  |. .!.".#.$.%.&.'|
50: 0028 0029 002A 002B  002C 002D 002E 002F  |.(.).*.+.,.-.../|
60: 0030 0031 0032 0033  0034 0035 0036 0037  |.0.1.2.3.4.5.6.7|
70: 0038 0039 003A 003B  003C 003D 003E 003F  |.8.9.:.;.<.=.>.?|


In [None]:
a = (ctypes.c_uint8 * 64)(*range(32, 96))
print(ah:=HexIntArray.from_ctypes(a).hexdump())

00: 20 21 22 23 24 25 26 27  28 29 2A 2B 2C 2D 2E 2F  | !"#$%&'()*+,-./|
10: 30 31 32 33 34 35 36 37  38 39 3A 3B 3C 3D 3E 3F  |0123456789:;<=>?|
20: 40 41 42 43 44 45 46 47  48 49 4A 4B 4C 4D 4E 4F  |@ABCDEFGHIJKLMNO|
30: 50 51 52 53 54 55 56 57  58 59 5A 5B 5C 5D 5E 5F  |PQRSTUVWXYZ[\]^_|


In [None]:
b = (ctypes.c_uint8 * 60).from_buffer_copy(a)
b[17:24] = (ctypes.c_uint8 * 7)(0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77)
print(bh:=HexIntArray.from_ctypes(b).hexdump())

00: 20 21 22 23 24 25 26 27  28 29 2A 2B 2C 2D 2E 2F  | !"#$%&'()*+,-./|
10: 30 11 22 33 44 55 66 77  38 39 3A 3B 3C 3D 3E 3F  |0."3DUfw89:;<=>?|
20: 40 41 42 43 44 45 46 47  48 49 4A 4B 4C 4D 4E 4F  |@ABCDEFGHIJKLMNO|
30: 50 51 52 53 54 55 56 57  58 59 5A 5B              |PQRSTUVWXYZ[|    


In [None]:
ah = HexIntArray.from_ctypes(a)
bh = HexIntArray.from_ctypes(b)
print(ah.diff(bh))


00: 20 21 22 23 24 25 26 27  28 29 2A 2B 2C 2D 2E 2F  | !"#$%&'()*+,-./|    20 21 22 23 24 25 26 27  28 29 2A 2B 2C 2D 2E 2F  | !"#$%&'()*+,-./|
10: 30 [96m31[0m [96m32[0m 33 [96m34[0m [96m35[0m [96m36[0m [96m37[0m  38 39 3A 3B 3C 3D 3E 3F  |0[96m1[0m[96m2[0m3[96m4[0m[96m5[0m[96m6[0m[96m7[0m89:;<=>?| [96m=>[0m 30 [96m11[0m [96m22[0m 33 [96m44[0m [96m55[0m [96m66[0m [96m77[0m  38 39 3A 3B 3C 3D 3E 3F  |0[96m.[0m[96m"[0m3[96mD[0m[96mU[0m[96mf[0m[96mw[0m89:;<=>?|
20: 40 41 42 43 44 45 46 47  48 49 4A 4B 4C 4D 4E 4F  |@ABCDEFGHIJKLMNO|    40 41 42 43 44 45 46 47  48 49 4A 4B 4C 4D 4E 4F  |@ABCDEFGHIJKLMNO|
30: 50 51 52 53 54 55 56 57  58 59 5A 5B [33m5C[0m [33m5D[0m [33m5E[0m [33m5F[0m  |PQRSTUVWXYZ[[33m\[0m[33m][0m[33m^[0m[33m_[0m| [96m=>[0m 50 51 52 53 54 55 56 57  58 59 5A 5B              |PQRSTUVWXYZ[|    


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()