In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

In [2]:
# Step 2: Mount Google Drive if using files from there
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Set your dataset path
base_path = '/content/drive/MyDrive/Colab Notebooks/archive (28)'
images_path = os.path.join(base_path, 'images')
train_csv_path = os.path.join(base_path, 'train.csv')
image_ids_csv_path = os.path.join(base_path, 'image_ids.csv')

In [4]:
train_df = pd.read_csv(train_csv_path)
image_ids_df = pd.read_csv(image_ids_csv_path)

# Merge to get filenames
merged_df = pd.merge(train_df, image_ids_df, left_on='image_id', right_on='id')
merged_df['file_path'] = merged_df['file_name'].apply(lambda x: os.path.join(images_path, x))

# Normalize bounding boxes (x_center, y_center, width, height)
merged_df['x_center'] = merged_df['bbox'].apply(lambda x: eval(x)[0] + eval(x)[2] / 2)
merged_df['y_center'] = merged_df['bbox'].apply(lambda x: eval(x)[1] + eval(x)[3] / 2)
merged_df['width'] = merged_df['bbox'].apply(lambda x: eval(x)[2])
merged_df['height'] = merged_df['bbox'].apply(lambda x: eval(x)[3])

In [5]:
train_df, val_df = train_test_split(merged_df, test_size=0.2, random_state=42)

In [6]:
class SignatureDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.data = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        image = Image.open(row['file_path']).convert('RGB')
        bbox = torch.tensor([row['x_center'], row['y_center'], row['width'], row['height']], dtype=torch.float32)

        if self.transform:
            image = self.transform(image)
        return image, bbox


In [7]:
train_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

In [8]:
train_dataset = SignatureDataset(train_df, transform=train_transform)
val_dataset = SignatureDataset(val_df, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8)

In [9]:
import torch.nn as nn
import torch

class ImprovedCNN(nn.Module):
    def __init__(self):
        super(ImprovedCNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1))
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256, 128), nn.ReLU(),
            nn.Linear(128, 4)
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

# Set device and prepare model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ImprovedCNN().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [10]:
num_epochs = 10  # increase if needed

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for images, bboxes in train_loader:
        images, bboxes = images.to(device), bboxes.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, bboxes)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Training Loss: {total_loss / len(train_loader):.4f}")


Epoch 1, Training Loss: 0.0310
Epoch 2, Training Loss: 0.0293
Epoch 3, Training Loss: 0.0285
Epoch 4, Training Loss: 0.0282
Epoch 5, Training Loss: 0.0281
Epoch 6, Training Loss: 0.0278
Epoch 7, Training Loss: 0.0276
Epoch 8, Training Loss: 0.0272
Epoch 9, Training Loss: 0.0271
Epoch 10, Training Loss: 0.0267


In [11]:
model_path = '/content/drive/My Drive/Colab Notebooks/archive (28)/signature_impcnn_model.pth'
torch.save(model.state_dict(), model_path)
