In [2]:
import torchvision
import xmltodict
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from PIL import Image
import os
import zipfile

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import xml.etree.ElementTree as ET
import torch
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader

def collate_fn(batch):
    images = [item[0] for item in batch]
    boxes = [item[1] for item in batch]
    labels = [item[2] for item in batch]
    
    images = torch.stack(images, dim=0)
    
    return images, boxes, labels

class LicensePlateDataset(Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted([f for f in os.listdir(root) if f.endswith(".jpg")]))
        self.labels = list(sorted([f for f in os.listdir(root) if f.endswith(".xml")]))

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, self.imgs[idx])
        
        img = Image.open(img_path)

        boxes = []
        labels = []

        ann_path = os.path.join(self.root, self.labels[idx])
        tree = ET.parse(ann_path)
        root = tree.getroot()
        boxes = []
        for obj in root.findall('object'):
            bbox = obj.find('bndbox')
            xmin = int(bbox.find('xmin').text)
            ymin = int(bbox.find('ymin').text)
            xmax = int(bbox.find('xmax').text)
            ymax = int(bbox.find('ymax').text)
            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.ones((len(boxes),), dtype=torch.int64)

        if self.transforms:
            img = self.transforms(img)

        
        img, boxes = resize_image(img, boxes)
        return img, boxes, labels

    def __len__(self):
        return len(self.imgs)

transform = T.Compose([T.ToTensor()])
train_dataset = LicensePlateDataset('../data/lp_dataset/train',  transforms=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=128, collate_fn=collate_fn)

val_dataset = LicensePlateDataset('../data/lp_dataset/valid',  transforms=transform)
val_loader = DataLoader(val_dataset, shuffle=True, batch_size=128, collate_fn=collate_fn) 

test_dataset = LicensePlateDataset('../data/lp_dataset/test',  transforms=transform)
test_loader = DataLoader(test_dataset, shuffle=True, batch_size=128, collate_fn=collate_fn)

for i in range(len(train_dataset)):
    if train_dataset.imgs[i].split('.jpg')[0] != train_dataset.labels[i].split('.xml')[0]:
        print(train_dataset.imgs[i].split('.jpg')[0])
        print("ERROR")
    


In [31]:
import xml.etree.ElementTree as ET
import torch
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import train_test_split

def collate_fn(batch):
    images = [item[0] for item in batch]
    boxes = [item[1] for item in batch]
    labels = [item[2] for item in batch]
    
    images = torch.stack(images, dim=0)
    
    return images, boxes, labels

class LicensePlateDataset(Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(os.path.join(root, "images"))))
        self.labels = list(sorted(os.listdir(os.path.join(root, "annotations"))))

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, "images", self.imgs[idx])
        ann_path = os.path.join(self.root, "annotations", self.labels[idx])
        img = Image.open(img_path)

        tree = ET.parse(ann_path)
        root = tree.getroot()

        boxes = []
        for obj in root.findall('object'):
            bbox = obj.find('bndbox')
            xmin = int(bbox.find('xmin').text)
            ymin = int(bbox.find('ymin').text)
            xmax = int(bbox.find('xmax').text)
            ymax = int(bbox.find('ymax').text)
            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.ones((len(boxes),), dtype=torch.int64)

        if self.transforms:
            img = self.transforms(img)

        img, boxes = resize_image(img, boxes)
        img = img[:3, :, :]
        return img, boxes, labels

    def __len__(self):
        return len(self.imgs)

transform = T.Compose([T.ToTensor()])
dataset = LicensePlateDataset('../data/kaggle_larxel',  transforms=transform)
data_loader = DataLoader(dataset, shuffle=True, batch_size=4)

# Get dataset indices
dataset_size = len(dataset)
indices = list(range(dataset_size))

# Split indices into train, val, and test
train_indices, val_test_indices = train_test_split(indices, test_size=0.2, random_state=42)
val_indices, test_indices = train_test_split(val_test_indices, test_size=0.5, random_state=42)

train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, test_indices)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

    

In [23]:
def resize_image(image, boxes):
        # Original image size
        orig_height, orig_width = image.shape[-2], image.shape[-1]

        # Resize image
        transform = T.Resize((240, 240), antialias=True)
        image = transform(image)
        # New image size
        new_height, new_width = image.shape[-2], image.shape[-1]

        # Scale factors
        scale_x = new_width / orig_width
        scale_y = new_height / orig_height

        # Adjust bounding boxes
        boxes[:, [0, 2]] *= scale_x
        boxes[:, [1, 3]] *= scale_y

        return image, boxes



In [41]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torch
import torch.utils.data
import torchvision.transforms as T
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches

batch_size = 4

# Load the model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2  # 1 class (license plate) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Move model to the appropriate device
device = 'cpu'
model.to(device)

# Construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
# Learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# Training loop
num_epochs = 1
i = 0
train_accuracies = []
val_accuracies = []
classification_losses = []
regression_losses = []
for epoch in range(num_epochs):
    for images, boxes, labels in train_loader:
        model.train()
        targets = []
        for box, label in zip(boxes, labels):
            target = {}
            target["boxes"] = box
            target["labels"] = label
            targets.append(target)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        images = images.to(device)
        loss_dict = model(images, targets)
        print(loss_dict)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        # Access individual losses
        classification_loss = loss_dict['loss_classifier']
        regression_loss = loss_dict['loss_box_reg']

        # Store losses
        classification_losses.append(float(classification_loss)/batch_size)
        regression_losses.append(float(regression_loss)/batch_size)
        
        if i % 10 == 0:
            train_accuracy = evaluate_model(model, train_loader, device)
            val_accuracy = evaluate_model(model, val_loader, device)

            train_accuracies.append(train_accuracy)
            val_accuracies.append(val_accuracy)

        i += 1

        lr_scheduler.step()

# Plot the classification and regression losses
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.plot(classification_losses, label='Classification Loss')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Classification Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(regression_losses, label='Regression Loss')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Regression Loss')
plt.legend()

plt.tight_layout()
plt.show()


plot_training_validation_accuracy(train_accuracies, val_accuracies)

print("Training complete!")


{'loss_classifier': tensor(0.5156, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0.0474, grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.1090, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.0048, grad_fn=<DivBackward0>)}


KeyboardInterrupt: 

In [29]:
def plot_training_validation_accuracy(train_acc, val_acc):
    plt.figure(figsize=(10, 5))
    plt.plot(train_acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.xlabel('Iteration')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

def evaluate_model(model, data_loader, device):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, boxes, labels in data_loader:
            images = list(image.to(device) for image in images)
            targets = []
            for box, label in zip(boxes, labels):
                target = {}
                target["boxes"] = box
                target["labels"] = label
                targets.append(target)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            outputs = model(images)
            for i, output in enumerate(outputs):
                pred_boxes = output['boxes']
                true_boxes = targets[i]['boxes']

                if pred_boxes.nelement() == 0 or true_boxes.nelement() == 0:
                    continue

                iou = torchvision.ops.box_iou(pred_boxes, true_boxes)
                max_iou, _ = iou.max(dim=1)
                correct += (max_iou > 0.5).sum().item()
                total += true_boxes.size(0)

    return correct / total if total > 0 else 0

In [60]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def visualize_image_with_boxes(img, target):
    fig, ax = plt.subplots(1)
    ax.imshow(img.permute(1, 2, 0).cpu().detach().numpy()) # Convert from (C, H, W) to (H, W, C) for matplotlib

    for i, box in enumerate(target['boxes']):
        if target['scores'][i] > 0.5:
          rect = patches.Rectangle((box[0], box[1]), box[2] - box[0], box[3] - box[1], linewidth=2, edgecolor='r', facecolor='none')
          ax.add_patch(rect)

    plt.show()


In [None]:
dataset = LicensePlateDataset('../data/Cars_1999', transforms=transform, train=False)
data_loader = DataLoader(dataset, shuffle=True, batch_size=1)
model.eval()
for images, boxes, labels in data_loader:
    print(images.shape)
    with torch.no_grad():
        targets = []
        for box, label in zip(boxes, labels):
            target = {}
            target["boxes"] = box
            target["labels"] = label
            targets.append(target)
        images = images[:,:3,:,:]
        pred = model(images)
        #print(pred)

        #evaluate_predictions(pred, targets)
        visualize_image_with_boxes(images[0], pred[0])


In [None]:
def calculate_iou(box1, box2):
    """Calculate the Intersection over Union (IoU) of two bounding boxes."""
    x1, y1, x2, y2 = box1
    x1_p, y1_p, x2_p, y2_p = box2

    # Calculate the intersection area
    x_left = max(x1, x1_p)
    y_top = max(y1, y1_p)
    x_right = min(x2, x2_p)
    y_bottom = min(y2, y2_p)

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # Calculate the areas of each bounding box
    box1_area = (x2 - x1) * (y2 - y1)
    box2_area = (x2_p - x1_p) * (y2_p - y1_p)

    # Calculate the IoU
    iou = intersection_area / float(box1_area + box2_area - intersection_area)
    return iou


In [83]:
def evaluate_model(predictions, targets, score_threshold=0.5):
    """
    Evaluate predictions against targets and calculate IoU metrics.

    Parameters:
    predictions (list): List of predicted dictionaries with keys 'boxes' and 'scores'
    targets (list): List of target dictionaries with key 'boxes'
    iou_threshold (float): IoU threshold to consider a prediction a true positive
    score_threshold (float): Confidence score threshold to filter predictions

    Returns:
    float: Average IoU
    """
    ious = []

    for prediction, target in zip(predictions, targets):
        pred_boxes = prediction['boxes']
        pred_scores = prediction['scores']
        target_boxes = target['boxes']

        # Filter predictions by score threshold
        filtered_pred_boxes = [box for box, score in zip(pred_boxes, pred_scores) if score >= score_threshold]

        for pred_box in filtered_pred_boxes:
            for target_box in target_boxes:
                iou = torchvision.ops.box_iou(target_box.cpu().numpy(), pred_box.cpu().numpy())
                ious.append(iou)

    if len(ious) == 0:
        return 0.0

    return sum(ious) / len(ious)

