In [1]:
import os
import glob
import torch.nn as nn
from PIL import Image
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import matplotlib.pyplot as plt

class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.image_paths = []
        self.labels = []
        self.transform = transform

        for label, folder_name in enumerate(sorted(os.listdir(root_dir))):
            folder_path = os.path.join(root_dir, folder_name)
            for image_path in glob.glob(os.path.join(folder_path, "*.jpg")):  # Assuming images are in PNG format
                self.image_paths.append(image_path)
                self.labels.append(label)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('L')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)
        return image, label
    
    def visualize_samples(self, num_samples=5):
        """Visualize a few samples from each label."""
        fig, axes = plt.subplots(35, 5, figsize=(100, 100))
        fig.suptitle('Dataset Samples', fontsize=10)

        for label in range(len(set(self.labels))):
            label_indices = [i for i, l in enumerate(self.labels) if l == label]
            sampled_indices = np.random.choice(label_indices, num_samples, replace=False)

            for i, idx in enumerate(sampled_indices):
                image = Image.open(self.image_paths[idx]).convert('L')
                ax = axes[label, i]
                ax.imshow(image, cmap='gray')
                ax.axis('off')
                ax.set_title(f'Label: {label}')

        plt.tight_layout()
        plt.show()

# Directory containing the dataset
dataset_dir = r'D:\data_cpv\archive\CNN letter Dataset'

# Define transformations
transform = transforms.Compose([
    transforms.Resize((32, 128)),  # Resize images to a fixed size
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize images
])

# Create dataset and data loader
train_dataset = CustomDataset(dataset_dir, transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# train_dataset.visualize_samples()


In [2]:
class CRNN(nn.Module):
    def __init__(self, imgH, nc, nclass, nh):
        super(CRNN, self).__init__()
        assert imgH % 16 == 0, 'imgH has to be a multiple of 16'

        ks = [3, 3, 3, 3, 3, 3, 2]
        ps = [1, 1, 1, 1, 1, 1, 0]
        ss = [1, 1, 1, 1, 1, 1, 1]
        nm = [64, 128, 256, 256, 512, 512, 512]

        cnn = nn.Sequential()

        def convRelu(i, batchNormalization=False):
            nIn = nc if i == 0 else nm[i - 1]
            nOut = nm[i]
            cnn.add_module(f'conv{i}', nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i]))
            if batchNormalization:
                cnn.add_module(f'batchnorm{i}', nn.BatchNorm2d(nOut))
            cnn.add_module(f'relu{i}', nn.ReLU(True))

        convRelu(0)
        cnn.add_module('pooling0', nn.MaxPool2d(2, 2))  # 64x16x64
        convRelu(1)
        cnn.add_module('pooling1', nn.MaxPool2d(2, 2))  # 128x8x32
        convRelu(2, True)
        convRelu(3)
        cnn.add_module('pooling2', nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 256x4x16
        convRelu(4, True)
        convRelu(5)
        cnn.add_module('pooling3', nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 512x2x16
        convRelu(6, True)  # 512x1x16

        self.cnn = cnn
        self.rnn = nn.Sequential(
            nn.Linear(512, nh),
            nn.ReLU(),
            nn.Linear(nh, nclass)
        )

    def forward(self, x):
        conv = self.cnn(x)
        b, c, h, w = conv.size()
        assert h == 1, "the height of conv must be 1"
        conv = conv.squeeze(2)
        conv = conv.permute(2, 0, 1)  # [w, b, c]
        output = self.rnn(conv)
        return output

In [3]:
import torch.optim as optim

# Initialize the model, loss function, and optimizer
num_classes = 36  # 0-9 and A-Z
model = CRNN(imgH=32, nc=1, nclass=num_classes, nh=256)
model.load_state_dict(torch.load(r"D:\data_cpv\char_trained_models\character_recognition_crnn_epoch_0.pth"))
criterion = nn.CTCLoss()
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.0001,momentum= 0.9, weight_decay=0.00001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

CRNN(
  (cnn): Sequential(
    (conv0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu0): ReLU(inplace=True)
    (pooling0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU(inplace=True)
    (pooling1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batchnorm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU(inplace=True)
    (conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu3): ReLU(inplace=True)
    (pooling2): MaxPool2d(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1), dilation=1, ceil_mode=False)
    (conv4): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batchnorm4): BatchNorm2d(512, eps=1e-05, momentum=0.

In [37]:


# Training loop
num_epochs = 200
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    i = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = torch.tensor(labels, dtype=torch.int32).to(device)
        
        optimizer.zero_grad()
        outputs = model(images)

        # Convert labels and outputs for CTCLoss
        input_lengths = torch.full((outputs.size(1),), outputs.size(0), dtype=torch.int32).to(device)
        target_lengths = torch.full((labels.size(0),), 1, dtype=torch.int32).to(device)

        loss = criterion(outputs, labels, input_lengths, target_lengths)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
        i += 1
        if i % 10 == 0:
            print(f"Epoch [{epoch}/{num_epochs}], Step [{i}/{len(train_loader)}], Loss: {loss.item():.4f}")

    lr_scheduler.step()
    if epoch%10 == 0 : torch.save(model.state_dict(), r"char_trained_models\character_recognition_crnn_epoch_{}.pth".format(epoch))
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

print("Training complete.")
torch.save(model.state_dict(), r'char_trained_models\character_recognition_crnn.pth')


  labels = torch.tensor(labels, dtype=torch.int32).to(device)


Epoch [0/200], Step [10/555], Loss: -2.2789
Epoch [0/200], Step [20/555], Loss: 5.4856
Epoch [0/200], Step [30/555], Loss: -0.4637
Epoch [0/200], Step [40/555], Loss: 1.3195
Epoch [0/200], Step [50/555], Loss: 3.6721
Epoch [0/200], Step [60/555], Loss: -0.8313
Epoch [0/200], Step [70/555], Loss: 0.6905
Epoch [0/200], Step [80/555], Loss: 2.4239
Epoch [0/200], Step [90/555], Loss: -0.1066
Epoch [0/200], Step [100/555], Loss: 6.2839
Epoch [0/200], Step [110/555], Loss: 0.1525
Epoch [0/200], Step [120/555], Loss: 2.5464
Epoch [0/200], Step [130/555], Loss: 0.7851
Epoch [0/200], Step [140/555], Loss: 0.8341
Epoch [0/200], Step [150/555], Loss: 3.6011
Epoch [0/200], Step [160/555], Loss: 3.8572
Epoch [0/200], Step [170/555], Loss: -0.7245
Epoch [0/200], Step [180/555], Loss: 5.9258
Epoch [0/200], Step [190/555], Loss: -0.0054
Epoch [0/200], Step [200/555], Loss: 1.9703
Epoch [0/200], Step [210/555], Loss: 2.4399
Epoch [0/200], Step [220/555], Loss: 2.0939
Epoch [0/200], Step [230/555], Loss

In [38]:
import torch.optim as optim
train_dataset = CustomDataset(dataset_dir, transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Initialize the model, loss function, and optimizer
num_classes = 36  # 0-9 and A-Z
model = CRNN(imgH=32, nc=1, nclass=num_classes, nh=256)
model.load_state_dict(torch.load(r"D:\data_cpv\char_trained_models\character_recognition_crnn.pth"))
criterion = nn.CTCLoss()
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.0001,momentum= 0.9, weight_decay=0.00001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)


# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    i = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = torch.tensor(labels, dtype=torch.int32).to(device)
        
        optimizer.zero_grad()
        outputs = model(images)

        # Convert labels and outputs for CTCLoss
        input_lengths = torch.full((outputs.size(1),), outputs.size(0), dtype=torch.int32).to(device)
        target_lengths = torch.full((labels.size(0),), 1, dtype=torch.int32).to(device)

        loss = criterion(outputs, labels, input_lengths, target_lengths)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
        i += 1
        if i % 10 == 0:
            print(f"Epoch [{epoch}/{num_epochs}], Step [{i}/{len(train_loader)}], Loss: {loss.item():.4f}")

    lr_scheduler.step()
    if epoch%10 ==0 : torch.save(model.state_dict(), r"char_trained_models\character_recognition_crnn_epoch_{}.pth".format(epoch))
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

print("Training complete.")
torch.save(model.state_dict(), r'char_trained_models\character_recognition_crnn.pth')




  labels = torch.tensor(labels, dtype=torch.int32).to(device)


Epoch [0/100], Step [10/1110], Loss: 0.8289
Epoch [0/100], Step [20/1110], Loss: 3.1424
Epoch [0/100], Step [30/1110], Loss: 2.7600
Epoch [0/100], Step [40/1110], Loss: -0.1629
Epoch [0/100], Step [50/1110], Loss: 5.0558
Epoch [0/100], Step [60/1110], Loss: -3.6743
Epoch [0/100], Step [70/1110], Loss: 5.2722
Epoch [0/100], Step [80/1110], Loss: -1.7651
Epoch [0/100], Step [90/1110], Loss: 7.2428
Epoch [0/100], Step [100/1110], Loss: 2.2092
Epoch [0/100], Step [110/1110], Loss: -0.9081
Epoch [0/100], Step [120/1110], Loss: 4.5387
Epoch [0/100], Step [130/1110], Loss: 0.3977
Epoch [0/100], Step [140/1110], Loss: 5.5969
Epoch [0/100], Step [150/1110], Loss: -2.6338
Epoch [0/100], Step [160/1110], Loss: 5.2419
Epoch [0/100], Step [170/1110], Loss: 4.7174
Epoch [0/100], Step [180/1110], Loss: 1.0808
Epoch [0/100], Step [190/1110], Loss: 2.0268
Epoch [0/100], Step [200/1110], Loss: 1.3814
Epoch [0/100], Step [210/1110], Loss: 4.3026
Epoch [0/100], Step [220/1110], Loss: -0.3360
Epoch [0/100]