# Notes for Possible Improvements
- Grid estimation could be improved by linear regression after assigning each point to appropriate gridline.
    - Estimation using x,y coords of point centers for each horizontal and vertical grid. This would improve the gridline intersection accuracy on deformations and orientation errors.
    - Would increase complexity with e.g. 256 regressions with 16x16 DMC
    - Possible efficient solution could be to only have 2 or 4 regressions on DMC alignment pattern, and copying their params accross other gridlines.
- Simple guaranteed error correction before final decoding process
    - Force L and timing to be black as they are consistent
- Implementation of decoding instead of using pylibdmtx
    - Have not done yet due to complexity of it (even though it is deterministic algorithms)
    - Could possibly "steal" relevant parts from pylibdmtx or libdmtx and rewrite for this purpose
- Rewrites for squeezing out performance is possible
    - Avoiding later matrix inversion by assigning 1s and 0s in reverse
    - Avoiding multiple sorting by ensuring methods do not reorder points

# Setup

In [None]:
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from scipy.optimize import minimize

from pylibdmtx.pylibdmtx import decode

## Yucheng Process

## Yucheng Funcs

In [None]:
def single_scale_retinex(image, sigma=30):
    """
    Does single scale retinex on the input image.

    Args:
        image: Input image (numpy array)
        sigma: Gaussian kernel size (default is 30)
    
    Returns:
        Tuple of reflectance and illumination images
    """
    image = image.astype(np.float32) + 1.0
    illumination = cv2.GaussianBlur(image, (0, 0), sigma)
    illumination += 1.0
    reflectance = np.log(image) - np.log(illumination)
    reflectance_display = cv2.normalize(reflectance, None, 0, 255, cv2.NORM_MINMAX)
    reflectance_display = reflectance_display.astype(np.uint8)
    illumination_display = cv2.normalize(illumination, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
    return reflectance_display, illumination_display

In [None]:
def non_max_suppression_fast(boxes, scores, overlap_thresh=0.3):
    """
    Perform non-maximum suppression on the bounding boxes.

    Args:
        boxes: List of bounding boxes (x, y, width, height)
        scores: List of scores for each bounding box
        overlap_thresh: Overlap threshold for suppression (default is 0.3)
    
    Returns:
        List of bounding boxes after non-maximum suppression
    """
    if len(boxes) == 0:
        return []
    boxes = np.array(boxes)
    scores = np.array(scores)
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 0] + boxes[:, 2]
    y2 = boxes[:, 1] + boxes[:, 3]
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = scores.argsort()[::-1]
    keep = []
    while len(idxs) > 0:
        i = idxs[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[idxs[1:]])
        yy1 = np.maximum(y1[i], y1[idxs[1:]])
        xx2 = np.minimum(x2[i], x2[idxs[1:]])
        yy2 = np.minimum(y2[i], y2[idxs[1:]])
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)
        inter = w * h
        overlap = inter / (areas[i] + areas[idxs[1:]] - inter)
        idxs = idxs[1:][overlap < overlap_thresh]
    return boxes[keep]

In [None]:
def extract_dominant_dot_template(image, min_area=20, max_area=300, patch_size=(24, 24), offset=5, size_tol=0.5):
    """
    Extracts the dominant dot template from the image.
    The function applies a series of image processing techniques to identify and extract the dot template.

    Args:
        image: Input image (numpy array)
        min_area: Minimum area of the dot to be considered (default is 20)
        max_area: Maximum area of the dot to be considered (default is 300)
        patch_size: Size of the patch to be extracted (default is (24, 24))
        offset: Offset for bounding box around the detected dot (default is 5)
        size_tol: Tolerance for size consistency (default is 0.5)

    Returns:
        Tuple of the extracted patch and contours of the detected dots.
    
    Raises:
        ValueError: If no valid dot candidates are found or if no size-consistent patches are found.
    """
    image_clean = cv2.bilateralFilter(image, d=15, sigmaColor=50, sigmaSpace=5)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    image_clean = clahe.apply(image_clean)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (16, 16))
    tophat = cv2.morphologyEx(image_clean, cv2.MORPH_BLACKHAT, kernel)

    _, binary_top = cv2.threshold(tophat, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(binary_top, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    candidates = []
    sizes = []
    img_w, img_h = image.shape

    for cnt in contours:
        area = cv2.contourArea(cnt)
        if min_area < area < max_area:
            x, y, w, h = cv2.boundingRect(cnt)
            crop_x_start = x - offset
            crop_x_end = x + w + offset
            crop_y_start = y - offset
            crop_y_end = y + h + offset

            if crop_x_start < 0 or crop_x_end >= img_w or crop_y_start < 0 or crop_y_end >= img_h:
                continue

            patch = image[crop_y_start:crop_y_end, crop_x_start:crop_x_end]
            candidates.append((patch, h, w))
            sizes.append((h, w))

    if not candidates:
        raise ValueError("No valid dot candidates found.")

    # Compute median size
    heights = [s[0] for s in sizes]
    widths = [s[1] for s in sizes]
    median_area = np.median(heights) * np.median(widths)

    # Keep only patches with similar size
    patches_filtered = []
    resized_for_matching = []
    for (patch, h, w) in candidates:
        # print(abs(h * w - median_area))
        if abs(h * w - median_area) / median_area < size_tol:
            patches_filtered.append(patch)
            resized_for_matching.append(cv2.resize(patch, patch_size))

    if not patches_filtered:
        raise ValueError("No size-consistent patches found.")

    # Find patch closest to the median template
    stack = np.stack(resized_for_matching, axis=0).astype(np.float32)
    median_template = np.median(stack, axis=0)
    diffs = [np.linalg.norm(p.astype(np.float32) - median_template) for p in resized_for_matching]
    best_idx = np.argmin(diffs)

    return patches_filtered[best_idx], contours

In [None]:
def contours_from_patch(patch):
    """
    Extracts contours from supplied patch image.
    """
    _, binary_patch = cv2.threshold(patch, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(binary_patch, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    return contours

In [None]:
def display_image(image, size=(300, 300)):
    """
    Displays the numpy image using PIL and notebook display functionality.

    Args:
        image: Input image (numpy array)
        size: Size to which the image should be resized (default is (300, 300))
    
    Returns:
        None
    """
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, size)
    pil_image = Image.fromarray(image)
    display(pil_image)

In [None]:
def display_yucheng_methods(nms_boxes, reflectance, dot_contours, img, illumination, dot_template):
    """
    Displays the results of Yuchengs methods for dot detection and template matching.

    Args:
        nms_boxes: List of bounding boxes after non-maximum suppression
        reflectance: Reflectance map (numpy array)
        dot_contours: Contours of the detected dots
        img: Original image (numpy array)
        illumination: Estimated illumination (numpy array)
        dot_template: Dot template (numpy array)
    
    Returns:
        None
    """
    # === Draw matching result ===
    output = cv2.cvtColor(reflectance, cv2.COLOR_GRAY2BGR)
    for (x, y, w, h) in nms_boxes:
        cv2.rectangle(output, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # === Draw contours over reflectance ===
    contour_vis = cv2.cvtColor(reflectance, cv2.COLOR_GRAY2BGR)
    cv2.drawContours(contour_vis, dot_contours, -1, (0, 0, 255), 1)

    # === Show results ===
    fig, axs = plt.subplots(2, 3, figsize=(10, 6))
    axs[0, 0].imshow(img, cmap='gray')
    axs[0, 0].set_title("Original Image")
    axs[0, 0].axis("off")

    axs[0, 1].imshow(illumination, cmap='gray')
    axs[0, 1].set_title("Estimated Illumination")
    axs[0, 1].axis("off")

    axs[0, 2].imshow(reflectance, cmap='gray')
    axs[0, 2].set_title("Reflectance Map (SSR)")
    axs[0, 2].axis("off")

    axs[1, 0].imshow(cv2.cvtColor(contour_vis, cv2.COLOR_BGR2RGB))
    axs[1, 0].set_title("Dot Contours")
    axs[1, 0].axis("off")

    axs[1, 1].imshow(dot_template, cmap='gray')
    axs[1, 1].set_title("Dot template (median of patches)")
    axs[1, 1].axis("off")

    axs[1, 2].imshow(cv2.cvtColor(output, cv2.COLOR_BGR2RGB))
    axs[1, 2].set_title("Template matching")
    axs[1, 2].axis("off")

    plt.tight_layout()
    plt.show()

## Yucheng Use

In [None]:
# === Load image (grayscale) ===
img_to_test = "../data/delete.jpg"
template_to_test = "../data/delete_template.jpg"
img = cv2.imread(img_to_test, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (320, 320))
dot_template = cv2.imread(template_to_test, cv2.IMREAD_GRAYSCALE)
dot_template = cv2.resize(dot_template, (19, 19))

In [None]:
# === Apply Retinex ===
reflectance, illumination = single_scale_retinex(img, sigma=64)

In [None]:
dot_contours = contours_from_patch(dot_template)

In [None]:
# # === Extract dominant template and contours ===
# # REPLACE WITH YOUR ACTUAL TEMPLATE
# dot_template, dot_contours = extract_dominant_dot_template(reflectance,
#                                                            min_area=20,
#                                                            max_area=300,
#                                                            patch_size=(24, 24),
#                                                            offset=5,
#                                                            size_tol=0.5)

# display_image(dot_template)

In [None]:
# === Template matching ===
result = cv2.matchTemplate(reflectance, dot_template, cv2.TM_CCOEFF_NORMED)
threshold = 0.7
locations = zip(*np.where(result >= threshold)[::-1])
scores = result[result >= threshold].flatten()

In [None]:
# === Bounding boxes (x, y, w, h) for each match ===
h, w = dot_template.shape
boxes = [(int(x), int(y), w, h) for (x, y) in locations]

In [None]:
# === Apply NMS ===
nms_boxes = non_max_suppression_fast(boxes, scores, overlap_thresh=0.3)

In [None]:
display_yucheng_methods(nms_boxes, reflectance, dot_contours, img, illumination, dot_template)

# Decoding

## Decoding Funcs

In [None]:
def generate_grid_affine(x0, y0, a, b, c, d, grid_size=16):
    coords = np.array([[i, j] for i in range(grid_size) for j in range(grid_size)])
    A = np.array([[a, b], [c, d]])
    transformed = (A @ coords.T).T
    return transformed + np.array([x0, y0]), coords  # return both transformed grid and (i,j) indices

def invert_affine(p, x0, y0, a, b, c, d):
    A = np.array([[a, b], [c, d]])
    A_inv = np.linalg.inv(A)
    return (A_inv @ (p - np.array([x0, y0]))).T  # returns (i, j)

def cost(params, observed_points):
    x0, y0, a, b, c, d = params
    grid_points, _ = generate_grid_affine(x0, y0, a, b, c, d)
    dists = np.linalg.norm(observed_points[:, None, :] - grid_points[None, :, :], axis=2)
    min_dists = np.min(dists, axis=1)
    return np.mean(np.minimum(min_dists**2, 10.0))  # robust loss

def show_grid(img, grid_pts):
    for p in grid_pts:
        cv2.circle(img, (int(p[0]), int(p[1])), 3, (0, 255, 0), -1)
    plt.imshow(img, cmap='gray')
    plt.title("Grid Points")
    plt.axis("off")
    plt.show()

def estimate_grid(nms_boxes):
    observed_pts = np.array([[x + w / 2, y + h / 2] for (x, y, w, h) in nms_boxes])
    # x, y based on most left top corner (coordinate with smallest x y)
    smallest_xy = float('inf')
    x, y = 0, 0
    for (x0, y0, w, h) in nms_boxes:
        avg = ((x0 + w / 2) + (y0 + h / 2)) / 2
        if avg < smallest_xy:
            smallest_xy = avg
            x, y = x0 + w / 2, y0 + h / 2

    print(f"Estimated top left corner: ({x}, {y})")
    init_params = [x, y, 18, 0, 0, 18]  # reasonable guess
    result = minimize(cost, init_params, args=(observed_pts,), method='Powell')
    x0_opt, y0_opt, a_opt, b_opt, c_opt, d_opt = result.x

    return x0_opt, y0_opt, a_opt, b_opt, c_opt, d_opt, observed_pts

def plot_grid_and_observed(grid_pts, observed_pts, ij_valid, observed_valid, x0_opt, y0_opt, a_opt, b_opt, c_opt, d_opt):
    plt.figure(figsize=(10, 10))

    # Plot grid lines
    for i in range(16):
        for j in range(16):
            p = invert_affine(np.array([i, j]), 0, 0, a_opt, b_opt, c_opt, d_opt)
    grid_lines, _ = generate_grid_affine(x0_opt, y0_opt, a_opt, b_opt, c_opt, d_opt)

    # Plot full grid
    plt.scatter(grid_pts[:,0], grid_pts[:,1], color='lightgray', label='Grid Points', s=10)

    # Plot observed dots
    plt.scatter(observed_pts[:,0], observed_pts[:,1], color='blue', label='Observed Dots')

    # Connect observed to nearest estimated grid point
    for pt, (i,j) in zip(observed_valid, ij_valid):
        grid_xy = generate_grid_affine(x0_opt, y0_opt, a_opt, b_opt, c_opt, d_opt, grid_size=16)[0][i*16 + j]
        plt.plot([pt[0], grid_xy[0]], [pt[1], grid_xy[1]], 'r-', alpha=0.3)

    # Optionally, annotate with grid indices
    for pt, (i, j) in zip(observed_valid, ij_valid):
        plt.text(pt[0]+0.5, pt[1]+0.5, f'({i},{j})', fontsize=8, color='green')

    plt.legend()
    plt.axis('equal')
    plt.title('Observed Points Mapped to Grid Indices')
    plt.show()

In [None]:
def display_DMC(matrix):
    """
    Display the DMC matrix like normal DMC.

    Args:
        matrix: Numpy array representing the DMC matrix
    """
    # Invert the matrix for display
    matrix = np.invert(matrix)
    plt.imshow(matrix, cmap='gray', interpolation='nearest')
    plt.title("DMC Matrix")
    plt.axis("off")
    plt.show()

In [None]:
def decode_DMC(matrix):
    """
    Decodes the DMC matrix using pylibdmtx.

    Args:
        matrix: Numpy array representing the DMC matrix
    
    Returns:
        
    """
    # Converting binary matrix to uint8 image
    image = np.zeros((matrix.shape[0], matrix.shape[1]), dtype=np.uint8)
    image[matrix == 1] = 255
    image = Image.fromarray(image, 'L')

    # Inverting the image for decoding
    image = Image.eval(image, lambda x: 255 - x)

    # Padding the image by 2 pixels to add margin larger than a DMC module (https://www.keyence.eu/ss/products/auto_id/codereader/basic_2d/datamatrix.jsp)
    image = np.pad(np.array(image), ((2, 2), (2, 2)), mode='constant', constant_values=255)
    image = Image.fromarray(image, 'L')

    # Resizing to larger image for better decoding
    image = image.resize((image.size[0] * 10, image.size[1] * 10), Image.NEAREST)

    # Decode using pylibdmtx
    decoded = decode(image)
    if decoded:
        return decoded[0].data.decode('utf-8')
    else:
        return None

## Decoding Use

In [None]:
# === Estimate grid and plot ===
x0_opt, y0_opt, a_opt, b_opt, c_opt, d_opt, observed_pts = estimate_grid(nms_boxes)

grid_pts, grid_indices = generate_grid_affine(x0_opt, y0_opt, a_opt, b_opt, c_opt, d_opt)
show_grid(img, grid_pts)

In [None]:
# Invert each observed point to get (i, j)
ij_estimates = np.array([invert_affine(p, x0_opt, y0_opt, a_opt, b_opt, c_opt, d_opt) for p in observed_pts])

# Round to nearest integer to get index estimate
ij_rounded = np.round(ij_estimates).astype(int)

# Keep only valid indices (within grid bounds)
valid_mask = np.all((ij_rounded >= 0) & (ij_rounded < 16), axis=1)
ij_valid = ij_rounded[valid_mask] # valid indices (what we use for decoding!!!)
observed_valid = observed_pts[valid_mask]

# Will look flipped and rotated
plot_grid_and_observed(grid_pts, observed_pts, ij_valid, observed_valid, x0_opt, y0_opt, a_opt, b_opt, c_opt, d_opt)

In [None]:
# === Decode the Data Matrix ===
# Get filled grid points
filled_grid = np.zeros((16, 16), dtype=bool)
for i, j in ij_valid:
    filled_grid[i, j] = True

# flip grid vertically
filled_grid_flipped = np.flipud(filled_grid)
# rotate grid 90 deg clockwise
filled_grid_rotated = np.rot90(filled_grid_flipped, k=-1)
display_DMC(filled_grid_rotated)

In [None]:
# === Decoding DMC ===
decoded_data = decode_DMC(filled_grid_rotated)
print(decoded_data)

# Full Decoding Pipeline

In [None]:
def decode_pipeline(image_path, template_path, debug=False, rotation=None):
    """
    Performs the entire decoding pipeline on the input image and template.

    Args:
        image_path: Path to the input image
        template_path: Path to the template image
    
    Returns:
        Decoded data from the DMC matrix or None if decoding fails.
    """
    # === Load image (grayscale) ===
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (320, 320))
    dot_template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE)
    dot_template = cv2.resize(dot_template, (19, 19))

    # === Rotation test ===
    if rotation is not None:
        M = cv2.getRotationMatrix2D((img.shape[1] // 2, img.shape[0] // 2), rotation, 1)
        img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))

    # === Apply Retinex ===
    reflectance, illumination = single_scale_retinex(img, sigma=64)

    # === Extract dot contours ===
    dot_contours = contours_from_patch(dot_template)

    # === Template matching ===
    result = cv2.matchTemplate(reflectance, dot_template, cv2.TM_CCOEFF_NORMED)
    threshold = 0.7
    locations = zip(*np.where(result >= threshold)[::-1])
    scores = result[result >= threshold].flatten()

    # === Bounding boxes (x, y, w, h) for each match ===
    h, w = dot_template.shape
    boxes = [(int(x), int(y), w, h) for (x, y) in locations]

    # === Apply NMS ===
    nms_boxes = non_max_suppression_fast(boxes, scores, overlap_thresh=0.3)

    if debug:
        display_yucheng_methods(nms_boxes, reflectance, dot_contours, img, illumination, dot_template)

    # === Estimating Grid ===
    x0_opt, y0_opt, a_opt, b_opt, c_opt, d_opt, observed_pts = estimate_grid(nms_boxes)

    # === Show the grid points ===
    grid_pts, grid_indices = generate_grid_affine(x0_opt, y0_opt, a_opt, b_opt, c_opt, d_opt)
    if debug:
        show_grid(img, grid_pts)
    
    # Invert each observed point to get (i, j)
    ij_estimates = np.array([invert_affine(p, x0_opt, y0_opt, a_opt, b_opt, c_opt, d_opt) for p in observed_pts])

    # Round to nearest integer to get index estimate
    ij_rounded = np.round(ij_estimates).astype(int)

    # Keep only valid indices (within grid bounds)
    valid_mask = np.all((ij_rounded >= 0) & (ij_rounded < 16), axis=1)
    ij_valid = ij_rounded[valid_mask] # valid indices (what we use for decoding!!!)
    observed_valid = observed_pts[valid_mask]

    if debug:
        plot_grid_and_observed(grid_pts, observed_pts, ij_valid, observed_valid, x0_opt, y0_opt, a_opt, b_opt, c_opt, d_opt)
    
    # === Decode the Data Matrix ===
    # Get filled grid points
    matrix = np.zeros((16, 16), dtype=bool)
    for i, j in ij_valid:
        matrix[i, j] = True

    # flip grid vertically
    matrix = np.flipud(matrix)
    # rotate grid 90 deg clockwise
    matrix = np.rot90(matrix, k=-1)
    display_DMC(matrix)

    # === Extra fix for finder patter ===
    matrix[:, 0] = 1  # left finder pattern
    matrix[-1, :] = 1 # bottom finder pattern
    # top finder pattern (top even indices)
    for i in range(0, 16, 2):
        matrix[0, i] = 1
    # right finder pattern (right odd indices)
    for i in range(1, 16, 2):
        matrix[i, -1] = 1

    # === Decoding DMC ===
    decoded_data = decode_DMC(matrix)

    return decoded_data

In [None]:
img_to_test = "../data/delete.jpg"
template_to_test = "../data/delete_template.jpg"
decode_pipeline(img_to_test, template_to_test, debug=True, rotation=0)