## An Improper Visualization of Data

This is a silly demonstration of how not to visualize data. In code below, the entire text of the ERV Bible is displayed as one bitmap image. The text is converted to hexidecimal three byte chunks and saved as an image. Each pixel represents three consequetive characters from the text represented in corresponding brightness of the values of Red, Green, and Blue.

The moral of the story: Data visualization is more than just "making a picture." Consider your audience and aggregate your data appropriately for useful consumption.

In [16]:
# Importing libraries
import math
import os
import csv
import requests
import numpy as np
from PIL import Image

# Data Source (English Revised Version of the Bible)
URL = "https://openbible.com/textfiles/erv.txt"

# CSV exported file name
csv_path = "erv_encoded.csv"

# BMP exported file name
bmp_path = "erv_image.bmp"

# CSV dimensions
ratio_w, ratio_h = 16, 9

# Bit joining
JOIN_WITHIN_CELL = ""

# Downloading data
resp = requests.get(URL, timeout=60)
resp.raise_for_status()
data = resp.content

# Converting ascii text to hex
hex_bytes = [f"{b:02X}" for b in data]

# Grouping hex into three byte chunks
groups = []
for i in range(0, len(hex_bytes), 3):
    chunk = hex_bytes[i:i+3]
    if len(chunk) < 3:
        chunk += ["00"] * (3 - len(chunk))
    groups.append(chunk)

# Counting number of six byte chunks
n_groups = len(groups)

# Scaling to 19x9 dimensions
k = math.ceil(math.sqrt(n_groups / (ratio_w * ratio_h)))
width, height = ratio_w * k, ratio_h * k
total_cells = width * height

# Padding end with zeros
if total_cells > n_groups:
    pad_needed = total_cells - n_groups
    groups += [["00"]*3 for _ in range(pad_needed)]

# Writing csv file
with open(csv_path, "w", newline="") as f:
    writer = csv.writer(f)
    idx = 0
    for _ in range(height):
        row_cells = [JOIN_WITHIN_CELL.join(groups[idx + j]) for j in range(width)]
        writer.writerow(row_cells)
        idx += width

# Writing bitmap file
arr = np.zeros((height, width, 3), dtype=np.uint8)
gidx = 0
for r in range(height):
    for c in range(width):
        r_byte = int(groups[gidx][0], 16)
        g_byte = int(groups[gidx][1], 16)
        b_byte = int(groups[gidx][2], 16)
        arr[r, c] = [r_byte, g_byte, b_byte]
        gidx += 1

Image.fromarray(arr, mode="RGB").save(bmp_path, format="BMP")