# importing libraries

In [None]:
import os
import numpy as np
import glob
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.transforms import transforms 
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import pathlib
import torchvision
import matplotlib.pyplot as plt
import torch.nn.functional as F
from pprint import pprint
import torchinfo
from torch.autograd import Variable
from pytorch_model_summary import summary
from tqdm import tqdm
import random
from torchvision.utils import make_grid
import torchvision.transforms.functional as TF
import cv2

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
# transformation
transformer = transforms.Compose([
    transforms.Grayscale(),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5]),
])

# path to dataset

In [None]:
train_path = # path to training dataset 
test_path = # path to testing dataset 

train_loader = DataLoader(
    torchvision.datasets.ImageFolder(train_path, transform=transformer),
    batch_size = 16,
    shuffle = True,
    num_workers=4,
    pin_memory=True,
    )
test_loader = DataLoader(
    torchvision.datasets.ImageFolder(test_path, transform=transformer),
    shuffle = False,
    num_workers=4,
    pin_memory=True
    )

# label dictionary

In [None]:
labels_map = {
    0: "2", 1: "3", 2: "4", 3: "5", 4: "6", 5: "7", 6: "8", 7: "9", 8: "A", 9: "B", 10: "C", 11: "D", 12: "E", 13: "F", 
    14: "G", 15: "H", 16: "J", 17: "K", 18: "L", 19: "M", 20: "P", 21: "Q", 22: "R", 23: "S", 24: "T", 25: "U", 26: "V", 
    27: "W",28: "X", 29: "Y", 30: "a", 31: "b", 32: "c", 33:  "d", 34: "e", 35: "f", 36: "h", 37: "j", 38: "k", 39: "m", 
    40: "n", 41: "p", 42: "q", 43: "r", 44: "s", 45: "t", 46: "u", 47: "v", 48: "w", 49: "x", 50: "y",
}

In [None]:
a = 0
fig = plt.figure(figsize=(10, 10))
b = np.random.randint(150, 3400)
for i in range(4):
    img, _ = train_loader.dataset[i*b]
    fig.add_subplot(2, 2, a+1)
    plt.imshow(img.permute(1, 2, 0), cmap="gray")
    a += 1
    if a == 4:
        a = 0
        break
        
plt.show()

In [None]:
# classes
root = pathlib.Path(train_path)
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])
print(classes)
print(len(classes))

In [None]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.network = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.4),
            nn.ReLU(),  
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.4),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.Dropout(0.4),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(8192, 128),
            nn.Dropout(0.4),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.Dropout(0.4),
            nn.ReLU(),
            nn.Linear(256, 51),
            nn.LogSoftmax(dim=1)
        )
    def forward(self, x):
        x = self.network(x)
        return x

In [None]:
model = CNNModel().to(device)

In [None]:
pprint(torchinfo.summary(model, input_size=(1, 1, 35, 35)))

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

In [None]:
train_count = len(glob.glob(train_path + '/**/*.png'))
test_count = len(glob.glob(test_path + '/**/*.png'))
print(train_count)
print(test_count)

In [None]:
print(train_loader.dataset)

# training model

In [None]:
epochs = 10
acc = 0.0

for epoch in range(epochs):
    model.train()
    train_acc = 0.0
    train_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.cuda())
        labels = Variable(labels.cuda())
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.cpu().data*images.size(0)
        _, prediction = torch.max(outputs.data, 1)
        train_acc += int(torch.sum(prediction == labels.data))
    train_acc = train_acc/train_count
    train_loss = train_loss/train_count
    model.eval()
    test_acc = 0
    print('|----------------------------------------------------------------------------------------|')
    for i, (images, labels)in enumerate(test_loader):
        images = Variable(images.cuda())
        labels = Variable(labels.cuda())
        outputs = model(images)
        _, prediction = torch.max(outputs.data, 1)
        test_acc += int(torch.sum(prediction == labels.data))
    test_acc = test_acc/test_count
    print(f'epoch {epoch+1} training loss {train_loss} training accuracy {train_acc}')
    print(f'testing accuracy {test_acc}')

In [None]:
print(train_acc)

In [None]:
print(train_loss)

In [None]:
a = 0
fig = plt.figure(figsize=(5, 5))
b = np.random.randint(90, 900)
b1 = np.random.randint(50, 100)
for i in range(4):
    img, _ = test_loader.dataset[i*b1]
    img1 = img.unsqueeze(0)
    print(img1.shape)
    img1 = Variable(img1.cuda())
    outputs = model(img1)
    _, prediction = torch.max(outputs.data, 1)
    fig.add_subplot(2, 2, a+1)
    plt.imshow(img.permute(1, 2, 0), cmap="gray")
    plt.title(labels_map[prediction.item()])
    a += 1
    if a == 4:
        a = 0
        break
        
plt.show()

# saving the model

In [None]:
torch.save(model.state_dict(), 'captcha_model.pth')