In [1]:
import cv2
import torch

# 1. Detect card number area with YOLO
def detect_card_number_area(yolo_model, image):
    # Run YOLO inference
    results = yolo_model(image)
    bounding_boxes = results.xyxy[0]  # Extract bounding boxes
    # Assume one bounding box containing the card number
    x1, y1, x2, y2 = map(int, bounding_boxes[0][:4])
    card_number_area = image[y1:y2, x1:x2]
    return card_number_area

# 2. Segment digits in the detected card number area
def segment_digits(card_number_area):
    # Convert to grayscale
    gray = cv2.cvtColor(card_number_area, cv2.COLOR_BGR2GRAY)
    
    # Apply binary thresholding
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    # Find contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Sort contours from left to right based on x-coordinate
    contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0])
    
    digit_images = []
    for ctr in contours:
        # Get bounding box for each contour
        x, y, w, h = cv2.boundingRect(ctr)
        
        # Filter contours that may not be digits based on size (you can adjust these thresholds)
        if h > 10 and w > 5:  # Threshold values to ignore noise
            # Crop the digit from the image using the bounding box
            digit = gray[y:y+h, x:x+w]
            
            # Resize the digit to the CNN input size, e.g., 224x224
            digit = cv2.resize(digit, (224, 224))
            
            # Add the preprocessed digit to the list
            digit_images.append(digit)
    
    return digit_images

# 3. Recognize digits using CNN
def recognize_digits(cnn_model, digit_images):
    cnn_model.eval()
    card_number = ""
    for digit_image in digit_images:
        # Preprocess each digit image (resize, normalize, etc.)
        digit_image = preprocess_digit_image(digit_image)
        # Convert to torch tensor and add batch dimension
        digit_image = torch.tensor(digit_image).unsqueeze(0).to(device)
        # Predict digit
        with torch.no_grad():
            output = cnn_model(digit_image)
            _, predicted_digit = torch.max(output, 1)
            card_number += str(predicted_digit.item())
    return card_number


In [2]:
import torch.nn as nn
import torch.nn.functional as F

class DigitCNN(nn.Module):
    def __init__(self):
        super(DigitCNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=2),
            nn.BatchNorm2d(32, momentum=0.1)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=2),
            nn.BatchNorm2d(64, momentum=0.1)
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=2),
            nn.BatchNorm2d(128, momentum=0.1)
        )
        self.fc1 = nn.Linear(128 * 29 * 29, 128)
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(128, 10)  # 10 classes for digits 0-9
        self

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 128 * 29 * 29)
        x = F.relu(self.dropout(self.fc1(x)))
        x = self.fc2(x)
        return x


In [3]:
raw_card_image = cv2.imread("APS360_Project_Dataset/dataset5/img/img_1.png")
yolo_model = torch.hub.load('yolov5', 'custom', path="yolov5/yolov5s.pt", source='local')
card_number_area = detect_card_number_area(yolo_model, raw_card_image)
digit_images = segment_digits(card_number_area)
cnn_model = DigitCNN()
cnn_model.load_state_dict(torch.load("CNN_bs128_lr0.001_SGD_94valacc.pth", map_location=torch.device('cpu')))
card_number = recognize_digits(cnn_model, digit_images)
print("Detected Card Number:", card_number)

YOLOv5  2024-11-3 Python-3.12.3 torch-2.3.1 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 6144MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


IndexError: index 0 is out of bounds for dimension 0 with size 0