In [1]:
import cv2
import numpy as np
from typing import List, Tuple, Optional
import matplotlib.pyplot as plt

def generate_sliding_windows(image_shape: Tuple[int, int],
                           window_sizes: List[Tuple[int, int]],
                           step_size: int = 16) -> List[Tuple[int, int, int, int]]:
    """
    Generate sliding windows across an image.

    Args:
        image_shape: (height, width) of the image
        window_sizes: List of (height, width) tuples for different window sizes
        step_size: Step size for sliding the window

    Returns:
        List of bounding boxes as (x, y, width, height) tuples
    """
    height, width = image_shape
    windows = []

    for win_h, win_w in window_sizes:
        # Slide window across the image
        for y in range(0, height - win_h + 1, step_size):
            for x in range(0, width - win_w + 1, step_size):
                windows.append((x, y, win_w, win_h))

    return windows

def extract_hog_features(image: np.ndarray,
                        orientations: int = 9,
                        pixels_per_cell: Tuple[int, int] = (8, 8),
                        cells_per_block: Tuple[int, int] = (2, 2)) -> np.ndarray:
    """
    Extract HOG (Histogram of Oriented Gradients) features from an image.

    Args:
        image: Input image (grayscale)
        orientations: Number of orientation bins
        pixels_per_cell: Size of each cell in pixels
        cells_per_block: Number of cells per block

    Returns:
        Feature vector
    """
    # Simple HOG-like feature extraction
    # Calculate gradients
    gx = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=1)
    gy = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=1)

    # Calculate magnitude and angle
    magnitude = np.sqrt(gx**2 + gy**2)
    angle = np.arctan2(gy, gx) * 180 / np.pi
    angle[angle < 0] += 180  # Convert to 0-180 range

    # Create histogram of gradients
    hist = np.zeros(orientations)
    bin_width = 180 / orientations

    for i in range(orientations):
        mask = (angle >= i * bin_width) & (angle < (i + 1) * bin_width)
        hist[i] = np.sum(magnitude[mask])

    # Normalize
    if np.sum(hist) > 0:
        hist = hist / np.sum(hist)

    return hist

def extract_window_features(image: np.ndarray, bbox: Tuple[int, int, int, int]) -> np.ndarray:
    """
    Extract features from a window region of the image.

    Args:
        image: Input image
        bbox: Bounding box as (x, y, width, height)

    Returns:
        Feature vector for the window
    """
    x, y, w, h = bbox

    # Extract window region
    window = image[y:y+h, x:x+w]

    # Convert to grayscale if needed
    if len(window.shape) == 3:
        window = cv2.cvtColor(window, cv2.COLOR_BGR2GRAY)

    # Resize to standard size for consistent features
    window = cv2.resize(window, (64, 64))

    # Extract HOG features
    hog_features = extract_hog_features(window)

    # Extract additional simple features
    # Mean intensity
    mean_intensity = np.mean(window)

    # Standard deviation
    std_intensity = np.std(window)

    # Edge density
    edges = cv2.Canny(window, 50, 150)
    edge_density = np.sum(edges > 0) / (window.shape[0] * window.shape[1])

    # Combine features
    features = np.concatenate([
        hog_features,
        [mean_intensity, std_intensity, edge_density]
    ])

    return features

def simple_classifier(features: np.ndarray, template_features: np.ndarray, threshold: float = 0.7) -> float:
    """
    Simple template matching classifier using cosine similarity.

    Args:
        features: Feature vector of current window
        template_features: Feature vector of the template/target object
        threshold: Similarity threshold for classification

    Returns:
        Similarity score (0-1)
    """
    # Normalize features
    if np.linalg.norm(features) > 0:
        features = features / np.linalg.norm(features)
    if np.linalg.norm(template_features) > 0:
        template_features = template_features / np.linalg.norm(template_features)

    # Calculate cosine similarity
    similarity = np.dot(features, template_features)

    return max(0, similarity)  # Ensure non-negative

def non_maximum_suppression(boxes: List[Tuple[int, int, int, int, float]],
                          overlap_threshold: float = 0.3) -> List[Tuple[int, int, int, int, float]]:
    """
    Apply non-maximum suppression to remove overlapping detections.

    Args:
        boxes: List of (x, y, width, height, score) tuples
        overlap_threshold: IoU threshold for suppression

    Returns:
        Filtered list of bounding boxes
    """
    if len(boxes) == 0:
        return []

    # Sort boxes by score in descending order
    boxes = sorted(boxes, key=lambda x: x[4], reverse=True)

    keep = []

    while boxes:
        # Take the box with highest score
        current = boxes.pop(0)
        keep.append(current)

        # Remove boxes with high overlap
        remaining = []
        for box in boxes:
            iou = calculate_iou(current[:4], box[:4])
            if iou < overlap_threshold:
                remaining.append(box)

        boxes = remaining

    return keep

def calculate_iou(box1: Tuple[int, int, int, int],
                  box2: Tuple[int, int, int, int]) -> float:
    """
    Calculate Intersection over Union (IoU) between two bounding boxes.

    Args:
        box1, box2: Bounding boxes as (x, y, width, height)

    Returns:
        IoU value
    """
    x1, y1, w1, h1 = box1
    x2, y2, w2, h2 = box2

    # Calculate intersection area
    x_left = max(x1, x2)
    y_top = max(y1, y2)
    x_right = min(x1 + w1, x2 + w2)
    y_bottom = min(y1 + h1, y2 + h2)

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # Calculate union area
    box1_area = w1 * h1
    box2_area = w2 * h2
    union_area = box1_area + box2_area - intersection_area

    # Calculate IoU
    if union_area == 0:
        return 0.0

    return intersection_area / union_area

def detect_objects(image: np.ndarray,
                  template_image: np.ndarray,
                  window_sizes: Optional[List[Tuple[int, int]]] = None,
                  step_size: int = 16,
                  similarity_threshold: float = 0.6,
                  nms_threshold: float = 0.3) -> List[Tuple[int, int, int, int, float]]:
    """
    Detect objects in an image using sliding windows.

    Args:
        image: Input image to search in
        template_image: Template image of the object to detect
        window_sizes: List of window sizes to use
        step_size: Step size for sliding windows
        similarity_threshold: Minimum similarity score for detection
        nms_threshold: IoU threshold for non-maximum suppression

    Returns:
        List of detected bounding boxes with scores
    """
    if window_sizes is None:
        h, w = template_image.shape[:2]
        window_sizes = [(h, w), (int(h*1.2), int(w*1.2)), (int(h*0.8), int(w*0.8))]

    # Extract template features
    template_bbox = (0, 0, template_image.shape[1], template_image.shape[0])
    template_features = extract_window_features(template_image, template_bbox)

    # Generate sliding windows
    windows = generate_sliding_windows(image.shape[:2], window_sizes, step_size)

    detections = []

    # Process each window
    for window in windows:
        x, y, w, h = window

        # Skip if window is out of bounds
        if x + w > image.shape[1] or y + h > image.shape[0]:
            continue

        # Extract features for current window
        window_features = extract_window_features(image, window)

        # Classify window
        similarity = simple_classifier(window_features, template_features)

        # If similarity is above threshold, add to detections
        if similarity > similarity_threshold:
            detections.append((x, y, w, h, similarity))

    # Apply non-maximum suppression
    final_detections = non_maximum_suppression(detections, nms_threshold)

    return final_detections

def draw_detections(image: np.ndarray,
                   detections: List[Tuple[int, int, int, int, float]],
                   color: Tuple[int, int, int] = (0, 255, 0),
                   thickness: int = 2) -> np.ndarray:
    """
    Draw bounding boxes on the image.

    Args:
        image: Input image
        detections: List of detections as (x, y, width, height, score)
        color: BGR color for bounding boxes
        thickness: Line thickness

    Returns:
        Image with drawn bounding boxes
    """
    result_image = image.copy()

    for detection in detections:
        x, y, w, h, score = detection

        # Draw rectangle
        cv2.rectangle(result_image, (x, y), (x + w, y + h), color, thickness)

        # Draw score
        score_text = f"{score:.2f}"
        cv2.putText(result_image, score_text, (x, y - 10),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 1)

    return result_image

# Example usage and demonstration
def demo_object_detection():
    """
    Demonstrate the object detection system with synthetic data.
    """
    # Create a synthetic test image
    test_image = np.random.randint(0, 255, (300, 400, 3), dtype=np.uint8)

    # Add some rectangular patterns to detect
    cv2.rectangle(test_image, (50, 50), (120, 120), (255, 0, 0), -1)
    cv2.rectangle(test_image, (200, 150), (270, 220), (0, 0, 255), -1)
    cv2.rectangle(test_image, (300, 80), (370, 150), (255, 255, 0), -1)

    # Create a template (similar to one of the rectangles)
    template = np.zeros((70, 70, 3), dtype=np.uint8)
    cv2.rectangle(template, (0, 0), (69, 69), (255, 0, 0), -1)

    print("Running object detection...")

    # Detect objects
    detections = detect_objects(
        test_image,
        template,
        window_sizes=[(70, 70), (80, 80), (60, 60)],
        step_size=10,
        similarity_threshold=0.3,
        nms_threshold=0.3
    )

    print(f"Found {len(detections)} detections:")
    for i, detection in enumerate(detections):
        x, y, w, h, score = detection
        print(f"  Detection {i+1}: bbox=({x}, {y}, {w}, {h}), score={score:.3f}")

    # Draw results
    result_image = draw_detections(test_image, detections)

    return test_image, template, result_image, detections

if __name__ == "__main__":
    # Run demonstration
    original, template, result, detections = demo_object_detection()

    print(f"\nDetection complete! Found {len(detections)} objects.")
    print("Use matplotlib or cv2.imshow() to visualize the results.")

Running object detection...
Found 69 detections:
  Detection 1: bbox=(50, 50, 70, 70), score=1.000
  Detection 2: bbox=(300, 80, 70, 70), score=1.000
  Detection 3: bbox=(200, 150, 70, 70), score=1.000
  Detection 4: bbox=(310, 50, 60, 60), score=0.976
  Detection 5: bbox=(340, 90, 60, 60), score=0.976
  Detection 6: bbox=(310, 120, 60, 60), score=0.976
  Detection 7: bbox=(270, 80, 60, 60), score=0.974
  Detection 8: bbox=(280, 110, 60, 60), score=0.972
  Detection 9: bbox=(190, 60, 80, 80), score=0.971
  Detection 10: bbox=(290, 0, 80, 80), score=0.971
  Detection 11: bbox=(160, 10, 80, 80), score=0.970
  Detection 12: bbox=(260, 10, 60, 60), score=0.970
  Detection 13: bbox=(140, 50, 80, 80), score=0.970
  Detection 14: bbox=(40, 130, 80, 80), score=0.970
  Detection 15: bbox=(200, 0, 80, 80), score=0.970
  Detection 16: bbox=(10, 170, 80, 80), score=0.969
  Detection 17: bbox=(280, 170, 60, 60), score=0.969
  Detection 18: bbox=(130, 110, 70, 70), score=0.969
  Detection 19: bbox=(