# Loading data

In [3]:
import os
import cv2
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt

In [4]:
# paths for the dataset folders and label files
dataset1_path = 'APS360_Project_Dataset/dataset1/bank_card_images_train'
train_labels_path = 'APS360_Project_Dataset/dataset1/bank_card_images_train_labels.csv'

# Load the label CSV files
dataset1_card_labels = pd.read_csv(train_labels_path)
dataset1_card_labels = dataset1_card_labels[['index', 'card_num']]

print(dataset1_card_labels.head())

def load_images_from_dataset1(base_path, labels_df):
    images = []
    labels = []
    
    labels_df['index'] = labels_df['index'].astype(str)
    
    for index, row in labels_df.iterrows():
        img_file = os.path.join(base_path, row['index'])
        image = cv2.imread(img_file)
        
        if image is not None:
            images.append(image)
            labels.append(row['card_num'])  # card_num is the label
        else:
            print(f"Warning: Failed to load image {img_file}")
    
    return images, labels 

dataset1_images, dataset1_labels = load_images_from_dataset1(dataset1_path, dataset1_card_labels)

print(f"Loaded {len(dataset1_images)} images from dataset1.")
print(f"First few labels: {dataset1_labels[:5]}")


   index    card_num
0  0.jpg  62257583.0
1  1.jpg  62257583.0
2  2.jpg  62257583.0
3  3.jpg  62257583.0
4  4.jpg  62257583.0


[ WARN:0@17.654] global loadsave.cpp:241 findDecoder imread_('APS360_Project_Dataset/dataset1/bank_card_images_train/nan'): can't open/read file: check file path/integrity


Loaded 618 images from dataset1.
First few labels: [62257583.0, 62257583.0, 62257583.0, 62257583.0, 62257583.0]


[ WARN:0@17.888] global loadsave.cpp:241 findDecoder imread_('APS360_Project_Dataset/dataset1/bank_card_images_train/nan'): can't open/read file: check file path/integrity


In [None]:
# for yolo object detection
dataset2_images_path = 'APS360_Project_Dataset/dataset2/JPEGImages'
dataset2_annotations_path = 'APS360_Project_Dataset/dataset2/Annotations'

def parse_annotation(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    filename = root.find('filename').text
    objects = []
    
    for obj in root.findall('object'):
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        objects.append({
            'bbox': [xmin, ymin, xmax, ymax]
        })
    
    return filename, objects

def load_images_for_dataset2(image_folder, annotation_folder):
    images = []
    annotations = []
    
    for xml_file in os.listdir(annotation_folder):
        if xml_file.endswith('.xml'):
            xml_path = os.path.join(annotation_folder, xml_file)
            filename, objects = parse_annotation(xml_path)
            img_path = os.path.join(image_folder, filename)
            image = cv2.imread(img_path)
            
            if image is not None:
                images.append(image)
                annotations.append(objects)
            else:
                print(f"Warning: Failed to load image {img_path}")
    
    return images, annotations

dataset2_images, dataset2_annotations = load_images_for_dataset2(dataset2_images_path, dataset2_annotations_path)

# Print the number of images and annotations loaded
print(f"Loaded {len(dataset2_images)} images and their corresponding annotations.")
print(f"First few annotations: {dataset2_annotations[:5]}")

In [None]:
# for yolo object detection
dataset3_path = 'APS360_Project_Dataset/dataset3_preprocessed'

In [27]:
dataset4_path = 'APS360_Project_Dataset/dataset4'

def load_images_for_dataset4(base_path):
    images = []
    labels = []
    
    for label in os.listdir(base_path):
        folder_path = os.path.join(base_path, label)
        
        if os.path.isdir(folder_path):
            for img_file in os.listdir(folder_path):
                img_path = os.path.join(folder_path, img_file)
                image = cv2.imread(img_path)
                
                if image is not None:
                    images.append(image) 
                    labels.append(int(label))  # Folder name is the label (0-9)
                else:
                    print(f"Warning: Failed to load image {img_path}")
    
    return images, labels

dataset4_images, dataset4_labels = load_images_for_dataset4(dataset4_path)

print(f"Loaded {len(dataset4_images)} images from dataset4.")
print(f"First few labels: {dataset4_labels[:5]}")

Loaded 885 images from dataset4.
First few labels: [0, 0, 0, 0, 0]


# Data Preprocessing

In [31]:
# resize, convert to grey scale, normalize
def preprocess_image(image):
    image = cv2.resize(image, (224, 224))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = image.astype('float32') / 255.0
    
    return image

In [35]:
def preprocess_images(images):
    preprocessed_images = [preprocess_image(img) for img in images]
    return np.array(preprocessed_images)
preprocessed_dataset1_images = preprocess_images(dataset1_images)
preprocessed_dataset4_images = preprocess_images(dataset4_images)

In [34]:
def preprocess_images_and_adjust_bboxes(images, annotations):
    preprocessed_images = []
    preprocessed_annotations = []
    
    for i, img in enumerate(images):
        original_height, original_width = img.shape[:2]
        img_preprocessed = preprocess_image(img)
        
        # Adjust bounding boxes for new image size
        scale_x = 224 / original_width
        scale_y = 224 / original_height
        
        adjusted_bboxes = []
        for obj in annotations[i]:
            bbox = obj['bbox']
            adjusted_bbox = [
                int(bbox[0] * scale_x),
                int(bbox[1] * scale_y),
                int(bbox[2] * scale_x),
                int(bbox[3] * scale_y)
            ]
            adjusted_bboxes.append({'bbox': adjusted_bbox})
        
        preprocessed_images.append(img_preprocessed)
        preprocessed_annotations.append(adjusted_bboxes)
    
    return np.array(preprocessed_images), preprocessed_annotations

preprocessed_dataset2_images, preprocessed_dataset2_annotations = preprocess_images_and_adjust_bboxes(dataset2_images, dataset2_annotations)

# Data Augmentation

In [44]:
import torch
import torchvision.transforms as transforms
import random
from PIL import Image

def augment_images(images, labels, target_count):
    augment_transform = transforms.Compose([
        transforms.RandomRotation(degrees=15),
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomResizedCrop(size=(224,224), scale=(0.9, 1.1)),
        transforms.ToTensor()
    ])
    
    augmented_images = []
    augmented_labels = []
    
    unique_labels, counts = np.unique(labels, return_counts=True)
    label_counts = dict(zip(unique_labels, counts))
    
    for label in unique_labels:
        label_images = [img for img, lbl in zip(images, labels) if lbl == label]
        current_count = label_counts[label]
        required_count = target_count.get(label, current_count)
        
        if current_count < required_count:
            images_to_augment = random.choices(label_images, k=required_count - current_count)
            for img in images_to_augment:
                img_pil = Image.fromarray(img)  
                augmented_img = augment_transform(img_pil)
                aug_img_np = np.array(augmented_img.permute(1, 2, 0)) 
                aug_img_np = cv2.resize(aug_img_np, (224,224))  
                augmented_images.append(aug_img_np)
                augmented_labels.append(label)
    
    return augmented_images, augmented_labels

In [45]:
target_count = {
    0: 150,
    1: 150,
    2: 150,
    3: 150,
    4: 150,
    5: 150,
    6: 150,
    7: 150,
    8: 150,
    9: 150
}

augmented_images, augmented_labels = augment_images(preprocessed_dataset4_images, dataset4_labels, target_count)

balanced_dataset4_images = np.concatenate((preprocessed_dataset4_images, augmented_images), axis=0)
balanced_dataset4_labels = np.concatenate((dataset4_labels, augmented_labels), axis=0)

print(f"Original dataset size: {len(dataset4_labels)}")
print(f"New dataset size after augmentation: {len(balanced_dataset4_labels)}")

Original dataset size: 885
New dataset size after augmentation: 1500
