Main notebook :3

In [10]:
import collections

from PIL import Image
import numpy as np

def get_image(filepath):
  image = Image.open(filepath)
  data = np.asarray(image)
  # filters out red color, and turns into binary array
  if len(data.shape) == 3:
    return (data[:,:,0] > 150).astype(np.uint8)
  else:
    return (data > 150).astype(np.uint8)

def save_image(grid, filepath):
  img = Image.fromarray(grid * 255, mode='L')
  img.save(filepath)
  print(f'saved to {filepath}')

def show_image(grid):
  # Given a binary grid, show the image
  grid = grid * 255
  # grid = 255 - grid
  out = Image.fromarray(np.uint8(grid))
  out.show()

def remove_small_islands(orig_grid, max_size=10):
  grid = orig_grid.copy()  # avoid modifying original
  rows, cols = grid.shape
  visited = np.zeros_like(grid, dtype=bool)

  directions = [(1,0), (-1,0), (0,1), (0,-1)]

  for r in range(rows):
    for c in range(cols):
      if grid[r, c] == 0 and not visited[r, c]:
        # Start BFS
        queue = collections.deque([(r, c)])
        visited[r, c] = True
        coords = [(r, c)]

        while queue:
          cr, cc = queue.popleft()
          for dr, dc in directions:
            nr, nc = cr + dr, cc + dc
            if (
              0 <= nr < rows and 0 <= nc < cols and
              grid[nr, nc] == 0 and not visited[nr, nc]
            ):
              visited[nr, nc] = True
              queue.append((nr, nc))
              coords.append((nr, nc))

        # After BFS, check island size
        if len(coords) <= max_size:
          for (rr, cc) in coords:
            grid[rr, cc] = 1

  return grid

def get_corners(grid, start_i, start_j):
  """Returns the coordinates of the corners of the page (tl, tr, br, bl)."""
  rows, cols = grid.shape
  queue = collections.deque()
  queue.append((start_i, start_j))

  visited = np.zeros([rows, cols])

  tl, tr, br, bl = (start_i, start_j), (start_i, start_j), (start_i, start_j), (start_i, start_j)

  while queue:
    i, j = queue.popleft()
    if i < 0 or i >= rows or j < 0 or j >= cols:
      continue
    if visited[i][j]:
      continue
    if grid[i][j]:
      continue
    visited[i][j] = 1

    if -i - j > -tl[0] - tl[1]:
      tl = i, j
    if -i + j > -tr[0] + tr[1]:
      tr = i, j
    if i - j > bl[0] - bl[1]:
      bl = i, j
    if i + j > br[0] + br[1]:
      br = i, j

    queue.append((i + 1, j))
    queue.append((i - 1, j))
    queue.append((i, j + 1))
    queue.append((i, j - 1))

  return tl, tr, br, bl

def normalize_page(grid, page_corners, width=1300, height=1950):
  tl, tr, br, bl = page_corners
  tlp = [tl[1], tl[0]]
  trp = [tr[1], tr[0]]
  brp = [br[1], br[0]]
  blp = [bl[1], bl[0]]
  src_pts = np.float32([tlp, trp, brp, blp])
  dst_pts = np.float32([
    [0, 0], # top-left
    [width-1, 0], # top-right
    [width-1, height-1], # bottom-right
    [0, height-1], # bottom-left
  ])

  M = cv2.getPerspectiveTransform(src_pts, dst_pts)
  return cv2.warpPerspective(grid, M, (width, height))

In [None]:
PATH_TEMPLATE = "book1/family-tree-{}.png"
idx = -1
for i in range(4, 13):
    print(f"i: {i}")
    filepath = PATH_TEMPLATE.format(f"{i:02d}")
    grid = get_image(filepath)
    grid = remove_small_islands(grid)
    rows, cols = grid.shape
    # Find starting pixel of page border
    i = int(rows / 2)
    j = cols - 20
    while grid[i][j] or grid[i][j - 1] or grid[i][j - 2] or grid[i][j - 3]:
        j -= 1
    right_page_corners = get_corners(grid, i, j)
    print("right_page_corners", right_page_corners)
    right_page = normalize_page(grid, right_page_corners)
    save_image(right_page, f"book1/pages/{idx}.png")
    idx += 1

    i = int(rows / 2)
    j = 0
    while grid[i][j] or grid[i][j + 1] or grid[i][j + 2] or grid[i][j + 3]:
        j += 1
    left_page_corners = get_corners(grid, i, j)
    print("left_page_corners", left_page_corners)
    left_page = normalize_page(grid, left_page_corners)
    save_image(left_page, f"book1/pages/{idx}.png")
    idx += 1


i: 4
right_page_corners ((127, 1909), (110, 3182), (2043, 3199), (2059, 1927))
saved to book1/pages/-1.png
left_page_corners ((133, 425), (147, 1705), (2089, 1682), (2079, 402))
saved to book1/pages/0.png
i: 5


  img = Image.fromarray(grid * 255, mode='L')


right_page_corners ((131, 1915), (110, 3190), (2039, 3213), (2060, 1939))
saved to book1/pages/1.png
left_page_corners ((128, 441), (141, 1721), (2080, 1697), (2069, 416))
saved to book1/pages/2.png
i: 6
right_page_corners ((128, 1897), (107, 3170), (2046, 3194), (2065, 1922))
saved to book1/pages/3.png
left_page_corners ((126, 427), (137, 1707), (2073, 1688), (2064, 405))
saved to book1/pages/4.png
i: 7
right_page_corners ((124, 1903), (103, 3175), (2041, 3199), (2060, 1926))
saved to book1/pages/5.png
left_page_corners ((127, 436), (138, 1716), (2072, 1696), (2062, 414))
saved to book1/pages/6.png
i: 8
right_page_corners ((132, 1890), (112, 3163), (2047, 3185), (2065, 1912))
saved to book1/pages/7.png
left_page_corners ((128, 423), (142, 1704), (2081, 1682), (2070, 401))
saved to book1/pages/8.png
i: 9
right_page_corners ((126, 1898), (105, 3173), (2036, 3194), (2054, 1919))
saved to book1/pages/9.png
left_page_corners ((133, 433), (142, 1713), (2083, 1696), (2077, 416))
saved to boo

# Crop

In [None]:
def shrink_page(grid):
    """Shrinks the page to the smallest possible rectangle that contains the tree."""
    rows, cols = grid.shape

    def find_shrink_start(arr):
        mid = int(len(arr) / 2)
        search_range = arr[mid - 150 : mid + 150]
        max_idx = np.argmax(search_range)
        return mid - 150 + max_idx

    # Cut left/right
    col_present = np.sum(1 - grid, axis=0)
    min_col = find_shrink_start(col_present)
    max_col = min_col
    while min_col > 0 and col_present[min_col]:
        min_col -= 1
    while max_col < cols - 1 and col_present[max_col]:
        max_col += 1
    min_col = max(min_col - 10, 0)
    max_col = min(max_col + 10, cols - 1)
    print("Min and max cols")
    print(min_col, max_col)

    grid = grid[:, min_col : max_col + 1]

    # Cut top/bottom
    row_present = np.sum(1 - grid, axis=1)
    min_row = 0
    # while not row_present[min_row]:
    #   min_row += 1
    # min_row = max(min_row - 10, 0)

    max_row = row_present.shape[0] - 1
    while not row_present[max_row]:
        max_row -= 1
    max_row = min(max_row + 10, rows - 1)
    print("Min and max rows")
    print(min_row, max_row)
    grid = grid[:max_row, :]

    return grid

In [None]:
PATH_TEMPLATE = "book1/pages/{}.png"
for i in range(17):
    print(f"i: {i}")
    filepath = PATH_TEMPLATE.format(i)
    grid = get_image(filepath)
    rows, cols = grid.shape
    if i % 2 == 0:
        grid = grid[50 : rows - 50, 150 : cols - 50]
    else:
        grid = grid[50 : rows - 50, 50 : cols - 150]
    grid = shrink_page(grid)
    save_image(grid, f"book1/cropped/{i}.png")

i: 0
Min and max cols
360 637
Min and max rows
0 1699
saved to book1/cropped/0.png
i: 1
Min and max cols
512 686
Min and max rows
0 1708
saved to book1/cropped/1.png
i: 2
Min and max cols
359 638
Min and max rows
0 1702
saved to book1/cropped/2.png
i: 3
Min and max cols
308 891
Min and max rows
0 1704
saved to book1/cropped/3.png
i: 4
Min and max cols
360 637
Min and max rows
0 1702
saved to book1/cropped/4.png
i: 5
Min and max cols
462 738
Min and max rows
0 1754
saved to book1/cropped/5.png
i: 6
Min and max cols
309 688
Min and max rows
0 1758
saved to book1/cropped/6.png
i: 7
Min and max cols
411 790
Min and max rows
0 1756
saved to book1/cropped/7.png
i: 8
Min and max cols
0 999
Min and max rows
0 1755
saved to book1/cropped/8.png
i: 9
Min and max cols
565 636
Min and max rows
0 434
saved to book1/cropped/9.png
i: 10
Min and max cols
463 536
Min and max rows
0 440
saved to book1/cropped/10.png
i: 11
Min and max cols
514 688
Min and max rows
0 440
saved to book1/cropped/11.png
i: 12

  img = Image.fromarray(grid * 255, mode='L')


# Graph parsing heuristic

In [212]:
def find_lines(image, threshold=70):
  """
  Find connected components in a binary image that span more than 70 pixels
  in either width or height.
  
  Args:
    image: 2D list where 0 = black pixel, 1 = white pixel
    
  Returns:
    List of sets, where each set contains (row, col) tuples of pixels
    in a large connected component
  """
  if len(image) == 0 or len(image[0]) == 0:
    return []
  
  image = image.copy()
  rows = len(image)
  cols = len(image[0])
  results = []
  
  # Iterate through entire array
  for i in range(rows):
    for j in range(cols):
      # If we find a black pixel (0)
      if image[i][j] == 0:
        # BFS to find connected component
        component = set()
        queue = [(i, j)]
        image[i][j] = 1  # Mark as visited
        
        min_x = max_x = i
        min_y = max_y = j
        
        while queue:
          x, y = queue.pop(0)
          component.add((x, y))
          
          # Update bounds
          min_x = min(min_x, x)
          max_x = max(max_x, x)
          min_y = min(min_y, y)
          max_y = max(max_y, y)
          
          # Check four direct directions
          for dx, dy in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
            nx, ny = x + dx, y + dy
            
            # Check bounds and if pixel is black
            if (0 <= nx < rows and 0 <= ny < cols and 
              image[nx][ny] == 0):
              image[nx][ny] = 1  # Mark as visited
              queue.append((nx, ny))
        
        # Check if component is large enough
        if max_x - min_x > threshold or max_y - min_y > threshold:
          results.append(component)
  
  return results

def find_line_ends(points):
  """
  Find the endpoints of a line represented by a set of points.
  
  Args:
    points: Set of (x, y) tuples representing a connected component
    
  Returns:
    Tuple of two sets: (left_end_ys, right_end_ys)
    Each set contains y-coordinates of endpoints, with adjacent values removed
  """
  if not points:
    return (set(), set())
  
  # Find minimum x coordinate
  min_x = min(x for x, y in points)
  
  # Get y coordinates near the left edge (within 5 of min_x)
  left_ys = {y for x, y in points if abs(x - min_x) <= 5}
  
  # Remove adjacent numbers
  left_ys_filtered = remove_adjacent(left_ys)
  
  # Find maximum x coordinate
  max_x = max(x for x, y in points)
  
  # Get y coordinates near the right edge (within 5 of max_x)
  right_ys = {y for x, y in points if abs(x - max_x) <= 5}
  
  # Remove adjacent numbers
  right_ys_filtered = remove_adjacent(right_ys)
  
  min_x_points = [(min_x, y) for y in sorted(left_ys_filtered)]
  max_x_points = [(max_x, y) for y in sorted(right_ys_filtered)]
  return (min_x_points, max_x_points)


def remove_adjacent(numbers, threshold=30):
  """
  Remove adjacent numbers from a set, keeping only the first of each sequence.
  For example: {4, 6, 7, 8} becomes {4, 7}
  
  Args:
    numbers: Set of integers
    
  Returns:
    Set with adjacent numbers removed
  """
  if not numbers:
    return set()
  
  sorted_nums = sorted(numbers)
  result = {sorted_nums[0]}
  
  for i in range(1, len(sorted_nums)):
    # If current number is not adjacent to previous, keep it
    if sorted_nums[i] - sorted_nums[i-1] > threshold:
      result.add(sorted_nums[i])
  
  return result


In [229]:
# grid = get_image("Untitled.jpg")  
grid = get_image("book1/cropped/13.png")
raw_results = find_lines(grid)
results = []
for raw_result in raw_results:
  parents, children = find_line_ends(raw_result)
  if len(parents) != 1:
    raise ValueError("More than one parent found", parents)
  parent = parents[0]
  results.append((parent, children))
  print(parent, children)

(134, 877) [(371, 878)]
(450, 877) [(682, 877)]
(763, 877) [(997, 572), (997, 878)]
(1183, 568) [(1320, 61), (1320, 161), (1320, 261), (1320, 568)]
(1183, 882) [(1320, 882)]
(1438, 569) [(1628, 370), (1628, 468), (1628, 570)]
(1447, 161) [(1634, 160)]
(1447, 261) [(1635, 260)]
(1501, 882) [(1635, 673), (1635, 780), (1635, 881)]


In [230]:
import dataclasses
from typing import List, Optional, Tuple

@dataclasses.dataclass
class Node:
  top: Optional[Tuple[int, int]] = None
  bot: Optional[Tuple[int, int]] = None
  children: List[Tuple[int, int]] = dataclasses.field(default_factory=list)
  def __str__(self):
    return f"Node(top={self.top}, bot={self.bot})"

nodes = []
for parent, children in results:
  pn = Node(bot=parent)
  nodes.append(pn)
  for c in children:
    cn = Node(top=c)
    pn.children.append(cn)
    nodes.append(cn)

while True:
  made_progress = False
  for i in range(len(nodes)):
    for j in range(len(nodes)):
      if i == j:
        continue
      p = nodes[i]
      c = nodes[j]
      # print(p, c)
      # Only try the connection is p is missing bot and c is missing top
      if p.bot or c.top:
        continue
      # print("valid")
      tx, ty = p.top
      bx, by = c.bot
      if 0 < bx - tx < 200 and abs(by - ty) < 20:
        # print("merge")
        # Same node, merge
        p.bot = c.bot
        p.children = c.children
        del nodes[j]
        made_progress = True
        break
    if made_progress:
      break
  if not made_progress:
    break

print(len(nodes))


18


In [231]:
def infer_ends(nodes, grid):
  for n in nodes:
    if n.top is None:
      top = max(0, n.bot[0] - 200)
      y = n.bot[1] 
      while top < n.bot[0] and 0 not in grid[top][y-30:y+30]:
        top += 1
      top = max(0, top - 10)
      n.top = (top, y)
    if n.bot is None:
      bot = min(grid.shape[0] - 1, n.top[0] + 200)
      y = n.top[1]
      while bot > n.top[0] and 0 not in grid[bot][y-30:y+30]:
        bot -= 1
      bot = min(bot + 10, grid.shape[0] - 1)
      n.bot = (bot, y)

In [232]:
infer_ends(nodes, grid)

In [233]:
print(nodes)

[Node(top=(57, 877), bot=(134, 877), children=[Node(top=(371, 878), bot=(450, 877), children=[Node(top=(682, 877), bot=(763, 877), children=[Node(top=(997, 572), bot=(1183, 568), children=[Node(top=(1320, 61), bot=(1438, 61), children=[]), Node(top=(1320, 161), bot=(1447, 161), children=[Node(top=(1634, 160), bot=(1759, 160), children=[])]), Node(top=(1320, 261), bot=(1447, 261), children=[Node(top=(1635, 260), bot=(1758, 260), children=[])]), Node(top=(1320, 568), bot=(1438, 569), children=[Node(top=(1628, 370), bot=(1758, 370), children=[]), Node(top=(1628, 468), bot=(1759, 468), children=[]), Node(top=(1628, 570), bot=(1759, 570), children=[])])]), Node(top=(997, 878), bot=(1183, 882), children=[Node(top=(1320, 882), bot=(1501, 882), children=[Node(top=(1635, 673), bot=(1811, 673), children=[]), Node(top=(1635, 780), bot=(1811, 780), children=[]), Node(top=(1635, 881), bot=(1760, 881), children=[])])])])])]), Node(top=(371, 878), bot=(450, 877), children=[Node(top=(682, 877), bot=(7

In [234]:
def print_trees(nodes):
  def print_tree(n, indent):
    print("  " * indent + str(n))
    for c in n.children:
      print_tree(c, indent + 1)

  children = []
  for n in nodes:
    for c in n.children:
      children.append(c)
  for n in nodes:
    if n not in children:
      print_tree(n, 0)

In [235]:
print_trees(nodes)

Node(top=(57, 877), bot=(134, 877))
  Node(top=(371, 878), bot=(450, 877))
    Node(top=(682, 877), bot=(763, 877))
      Node(top=(997, 572), bot=(1183, 568))
        Node(top=(1320, 61), bot=(1438, 61))
        Node(top=(1320, 161), bot=(1447, 161))
          Node(top=(1634, 160), bot=(1759, 160))
        Node(top=(1320, 261), bot=(1447, 261))
          Node(top=(1635, 260), bot=(1758, 260))
        Node(top=(1320, 568), bot=(1438, 569))
          Node(top=(1628, 370), bot=(1758, 370))
          Node(top=(1628, 468), bot=(1759, 468))
          Node(top=(1628, 570), bot=(1759, 570))
      Node(top=(997, 878), bot=(1183, 882))
        Node(top=(1320, 882), bot=(1501, 882))
          Node(top=(1635, 673), bot=(1811, 673))
          Node(top=(1635, 780), bot=(1811, 780))
          Node(top=(1635, 881), bot=(1760, 881))


In [236]:
def get_name_image(node, grid):
  y = node.top[1]
  name = grid[node.top[0]+5:node.bot[0]-5, max(0, y-40):min(grid.shape[1], y+40)]

  col_present = np.sum(1 - name, axis=0)
  min_y = 0
  while not col_present[min_y]:
    min_y += 1
  min_y = max(min_y - 10, 0)
  max_y = len(col_present) - 1
  while not col_present[max_y]:
    max_y -= 1
  max_y = min(max_y + 10, len(col_present) - 1)
  name = name[:, min_y:max_y]
  print("min_y", min_y, "max_y", max_y)

  row_present = np.sum(1 - name, axis=1)
  min_x = 0
  while not row_present[min_x]:
    min_x += 1
  min_x = max(min_x - 10, 0)
  max_x = len(row_present) - 1
  while not row_present[max_x]:
    max_x -= 1
  max_x = min(max_x + 10, len(row_present) - 1)
  name = name[min_x:max_x, :]
  print("min_x", min_x, "max_x", max_x)
  return name


In [248]:
import json

def sort_nodes(nodes: List[Node]) -> List[Node]:
    def compare_key(node):
        return (node.top[0], node.top[1])
    
    # First, sort by both top[0] and top[1] as a baseline
    nodes_sorted = sorted(nodes, key=compare_key)
    
    # Now apply the custom logic
    result = []
    i = 0
    while i < len(nodes_sorted):
        # Collect nodes that should be grouped together
        group = [nodes_sorted[i]]
        j = i + 1
        
        while j < len(nodes_sorted):
            # Check if top[0] values are within 60 of each other
            if abs(nodes_sorted[j].top[0] - nodes_sorted[i].top[0]) < 60:
                group.append(nodes_sorted[j])
                j += 1
            else:
                break
        
        # Sort the group by top[1]
        group.sort(key=lambda node: node.top[1])
        result.extend(group)
        i = j
    
    return result

nodes = sort_nodes(nodes)

idx = 0
tree = {}
node_to_idx = {(n.top, n.bot): idx for idx, n in enumerate(nodes)}
for n in nodes:
  print(n)
  tree[node_to_idx[(n.top, n.bot)]] = [node_to_idx[(c.top, c.bot)] for c in n.children]
  name_img = get_name_image(n, grid)
  save_image(name_img, f"book1/names/13_{idx}.png")
  idx += 1
json.dump(tree, open("book1/names/13_tree.json", "w"))

Node(top=(57, 877), bot=(134, 877))
min_y 9 max_y 79
min_x 0 max_x 61
saved to book1/names/13_0.png
Node(top=(371, 878), bot=(450, 877))
min_y 13 max_y 76
min_x 0 max_x 65
saved to book1/names/13_1.png
Node(top=(682, 877), bot=(763, 877))
min_y 16 max_y 78
min_x 9 max_x 63
saved to book1/names/13_2.png
Node(top=(997, 572), bot=(1183, 568))
min_y 8 max_y 71
min_x 2 max_x 173
saved to book1/names/13_3.png
Node(top=(997, 878), bot=(1183, 882))
min_y 11 max_y 76
min_x 2 max_x 173
saved to book1/names/13_4.png
Node(top=(1320, 61), bot=(1438, 61))
min_y 1 max_y 71
min_x 0 max_x 107
saved to book1/names/13_5.png
Node(top=(1320, 161), bot=(1447, 161))
min_y 6 max_y 73
min_x 0 max_x 113
saved to book1/names/13_6.png
Node(top=(1320, 261), bot=(1447, 261))
min_y 9 max_y 76
min_x 0 max_x 113
saved to book1/names/13_7.png
Node(top=(1320, 568), bot=(1438, 569))
min_y 11 max_y 78
min_x 0 max_x 107
saved to book1/names/13_8.png
Node(top=(1320, 882), bot=(1501, 882))
min_y 5 max_y 70
min_x 0 max_x 169


  img = Image.fromarray(grid * 255, mode='L')


In [None]:
# TODO: Merge adjacent pages if their lines go off the edge (and if they're missing the tag). Warn on orphans

# Graph parsing opencv

In [102]:
import cv2
import math

img = get_image("book1/cropped/0.png")
lines = cv2.HoughLinesP(
    1-img,
    rho=1,
    theta=math.pi/180,
    threshold=10,
    minLineLength=60,
    maxLineGap=10
)

In [103]:
lines = lines.squeeze().tolist()
# Swap coordinates if x1+y1 > x2+y2 to ensure consistent ordering
for line in lines:
    line[0], line[1], line[2], line[3] = line[1], line[0], line[3], line[2]
    if line[0] + line[1] > line[2] + line[3]:
        line[0], line[1], line[2], line[3] = line[2], line[3], line[0], line[1]

lines = sorted(lines, key=lambda x: x[0])

def deduplicate_lines(lines, threshold=10):
  """
  Deduplicate a 2D list where entries are considered duplicates
  if all corresponding values are within the threshold.
  """
  if not lines:
    return []
  
  deduplicated = []
  
  for line in lines:
    is_duplicate = False
    for existing in deduplicated:
      # Check if all corresponding values are within threshold
      if all(abs(line[i] - existing[i]) <= threshold for i in range(len(line))):
        is_duplicate = True
        break
    
    if not is_duplicate:
      deduplicated.append(line)
  
  return deduplicated

lines = deduplicate_lines(lines)
for line in lines:
  line2 = [line[1], line[0], line[3], line[2]]
  print(line2)

[242, 242, 241, 319]
[242, 288, 241, 367]
[241, 445, 241, 680]
[40, 544, 103, 544]
[35, 545, 35, 681]
[34, 545, 242, 545]
[64, 546, 140, 546]
[140, 600, 138, 669]
[140, 615, 138, 680]
[242, 808, 241, 921]
[242, 870, 241, 960]
[242, 1072, 241, 1133]
[242, 1077, 241, 1190]
[242, 1150, 242, 1215]
[242, 1235, 241, 1306]
[242, 1389, 241, 1502]


In [104]:
print(lines.shape)

AttributeError: 'list' object has no attribute 'shape'

In [None]:
for (x1, y1, x2, y2) in lines[:, 0]:
    cv2.line(img, (x1, y1), (x2, y2), (0,0,255), 1)

In [None]:
show_image(1-img)

In [None]:
print(stats)
print(centroids)
print(num_labels)
print(labels)

[[    11     64    256   1626   7392]
 [     0      0    278   1699 464215]
 [   233     84     13      9     83]
 [   138    710     11     14    124]
 [   126    712      9     15    104]
 [   233   1019      7     10     50]
 [   224   1029     30     18    354]]
[[ 203.32589286  817.2463474 ]
 [ 137.36288358  849.54353478]
 [ 238.96385542   88.01204819]
 [ 143.08064516  716.49193548]
 [ 130.08653846  718.77884615]
 [ 236.78       1023.76      ]
 [ 239.42655367 1037.71186441]]
7
[[1 1 1 ... 1 1 1]
 [1 1 1 ... 1 1 1]
 [1 1 1 ... 1 1 1]
 ...
 [1 1 1 ... 1 1 1]
 [1 1 1 ... 1 1 1]
 [1 1 1 ... 1 1 1]]


In [None]:
for i, stat in enumerate(stats):
    x, y, w, h, area = stat
    if area > 50:  # adjust threshold
        name_img = img[y:y+h, x:x+w]
        save_image(name_img, f"names/name_{i}.png")

saved to names/name_0.png
saved to names/name_1.png
saved to names/name_2.png
saved to names/name_3.png
saved to names/name_4.png
saved to names/name_6.png


  img = Image.fromarray(grid * 255, mode='L')


# attempt 2

In [None]:
img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)

# Optional blur to reduce noise
blur = cv2.GaussianBlur(img, (5, 5), 0)

# Binary inverse threshold — black border becomes white
_, thresh = cv2.threshold(blur, 50, 255, cv2.THRESH_BINARY_INV)

# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Sort contours by area (largest first)
contours = sorted(contours, key=cv2.contourArea, reverse=True)

In [None]:
print(len(contours))

In [None]:
page_corners = []

for cnt in contours[:2]:  # take top 2 (two pages)
    # Approximate contour to polygon
    peri = cv2.arcLength(cnt, True)
    approx = cv2.approxPolyDP(cnt, 0.02 * peri, True)
    print(approx)
    print(len(approx))

    if len(approx) == 4:
        corners = approx.reshape(4, 2)
        page_corners.append(corners)

# Visualize results
vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
for corners in page_corners:
    for x, y in corners:
        cv2.circle(vis, (int(x), int(y)), 10, (0, 0, 255), -1)

cv2.imwrite("detected_corners.png", vis)


# Sort each corner set to TL, TR, BR, BL order
def order_points(pts):
    s = pts.sum(axis=1)
    diff = np.diff(pts, axis=1)
    tl = pts[np.argmin(s)]
    br = pts[np.argmax(s)]
    tr = pts[np.argmin(diff)]
    bl = pts[np.argmax(diff)]
    return np.array([tl, tr, br, bl], dtype=np.float32)


ordered_pages = [order_points(c) for c in page_corners]

for i, corners in enumerate(ordered_pages):
    print(f"Page {i + 1} corners (TL, TR, BR, BL):\n", corners)

In [None]:
print(ordered_pages)

# CSV Parser

In [None]:
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv("data/zeng_google_sheet.csv", keep_default_na=False)
df["id"] = df["id"].astype(int)
df["name_images"] = [[]] * len(df)
df["generation"] = df["generation"].astype(int)
df["parent"] = df["parent"].astype(int)
df["children"] = df["children"].apply(
    lambda x: [int(child) for child in x.split(",")] if x else []
)

# Iterate through rows and set parent based on children relationships
for _, row in df.iterrows():
    # Get the current node's ID and its children
    node_id = row["id"]
    children = row["children"]

    # For each child, set its parent to the current node_id
    for child_id in children:
        df.loc[df["id"] == child_id, "parent"] = node_id
df

In [None]:
# Export DataFrame to JSONL format
df.to_json("data/book1.jsonl", orient="records", lines=True, force_ascii=False)

In [None]:
import json

with open("data/book1.jsonl", "r") as f:
    records = [json.loads(line) for line in f]

print(records[0])
