In [4]:
import os
import cv2
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
import torch.nn.functional as F
from utilities import utils, process, evaluate, modify, plot
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader,ConcatDataset
from torchvision import transforms
from PIL import Image
import torch.nn as nn
import torch.optim as optim

In [3]:
data_path = '/content/drive/MyDrive/Arabic-Handwritten-Text-Identification-Using-Deep-Learning-main/Arabic-Handwritten-Text-Identification-Using-Deep-Learning-main/data'

In [2]:
%cp -r /content/drive/MyDrive/Arabic-Handwritten-Text-Identification-Using-Deep-Learning-main/Arabic-Handwritten-Text-Identification-Using-Deep-Learning-main/utilities .

In [5]:
df = utils.load_images_to_dataframe(data_path + '/preprocessed')

# Encode the labels
label_encoder = LabelEncoder()
df['Target'] = label_encoder.fit_transform(df['Target'])

# Split the data into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
# Further split the training set into training and validation sets
train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=42)


Unnamed: 0_level_0,image,Target
filename,Unnamed: 1_level_1,Unnamed: 2_level_1
user002_mustadhafeen_030.png,"[[255, 255, 255, 255, 255, 255, 255, 255, 255,...",1
user071_qashtah_029.png,"[[255, 255, 255, 255, 255, 255, 255, 255, 255,...",70
user073_ghaleez_019.png,"[[255, 255, 255, 255, 255, 255, 255, 255, 255,...",72
user003_shateerah_031.png,"[[255, 255, 255, 255, 255, 255, 255, 255, 255,...",2
user004_sakhar_016.png,"[[255, 255, 255, 255, 255, 255, 255, 255, 255,...",3


In [6]:
# Custom Dataset class
class ImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        # Access the stored image data in grayscale
        image = self.dataframe.iloc[idx, 0]  # 'image' column has the image data (grayscale)
        label = self.dataframe.iloc[idx, 1]  # 'Target' column has the label

        # Convert grayscale image to RGB if needed
        image = np.expand_dims(image, axis=-1)  # Add channel dimension (H, W, 1)
        image = np.repeat(image, 3, axis=-1)  # Convert to RGB by duplicating the grayscale channel

        # Convert numpy array to PIL image
        image = Image.fromarray(image)

        # Apply transformations if any
        if self.transform:
            image = self.transform(image)

        return image, label

**Data Augmentation**

In [7]:
transform_group1 = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
])

transform_group2 = transforms.Compose([
    transforms.RandomRotation(degrees=15),
    transforms.ToTensor(),
])

# Group 2: Color Jitter
transform_group3 = transforms.Compose([
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
])

# Group 3: Resize
transform_group4 = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

In [8]:
dataset1 = ImageDataset(dataframe=train_df, transform=transform_group1)
dataset2 = ImageDataset(dataframe=train_df, transform=transform_group2)
dataset3 = ImageDataset(dataframe=train_df, transform=transform_group3)
dataset4 = ImageDataset(dataframe=train_df, transform=transform_group4)

In [9]:
combined_dataset = ConcatDataset([dataset1, dataset2, dataset3])

In [10]:
dataloader = DataLoader(combined_dataset, batch_size=32, shuffle=True)

In [12]:
print(df['Target'].unique())

[ 1 70 72  2  3 73 31  4  9 16 49  0 11 27 37 14 80 76 39 19 69 41 79 62
 25 48 52 36 44 35 26 60 45 81  7 40 24 57  8 55 68 78 63 58 23 64 33  5
 50 51 66 15 29 32 12 53 30 74 38 21 56 42 17 28  6 59 65 71 75 47 18 46
 10 34 77 13 67 61 43 54 22 20]


In [6]:
# Define the transformation
# transform = transforms.Compose([
#     transforms.Resize((128, 128)),
#     transforms.ToTensor(),
# ])

# # Create the dataset and dataloader
# dataset = ImageDataset(dataframe=train_df, transform=transform)

# dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [16]:
# Define the model class
class SimpleCNN(nn.Module):
    def __init__(self, num_filters, kernel_size, stride, padding, fc_neurons, dropout_rate, pool_size, pool_type="max"):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, num_filters, kernel_size=kernel_size, stride=stride, padding=padding)
        self.conv2 = nn.Conv2d(num_filters, num_filters * 2, kernel_size=kernel_size, stride=stride, padding=padding)

        # Pooling layer selection (max or average pooling)
        if pool_type == "max":
            self.pool = nn.MaxPool2d(kernel_size=pool_size, stride=pool_size)
        elif pool_type == "avg":
            self.pool = nn.AvgPool2d(kernel_size=pool_size, stride=pool_size)

        self.dropout = nn.Dropout(dropout_rate)

        self.fc1 = nn.Linear(num_filters * 2 * 16 * 16, fc_neurons)  # Initialize with a default size
        self.fc2 = nn.Linear(fc_neurons, 82)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))

        # Dynamically determine the input size for fc1
        x = x.view(x.size(0), -1)  # Flatten the output
        if x.shape[1] != self.fc1.in_features:
            self.fc1 = nn.Linear(x.shape[1], self.fc2.in_features)  # Adjust fc1 if needed
            print("fc1 input size adjusted to:", x.shape[1])

        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

    def _calculate_fc1_input_size(self, num_filters, kernel_size, stride, padding, pool_size):
        # Create a dummy input tensor
        dummy_input = torch.randn(1, 3, 128, 128)

        # Pass the dummy input through the convolutional and pooling layers
        dummy_output = self.pool(F.relu(nn.Conv2d(3, num_filters, kernel_size=kernel_size, stride=stride, padding=padding)(dummy_input)))
        dummy_output = self.pool(F.relu(nn.Conv2d(num_filters, num_filters * 2, kernel_size=kernel_size, stride=stride, padding=padding)(dummy_output)))

        # Calculate the flattened size of the output
        return dummy_output.view(1, -1).shape[1]



In [12]:
# Baseline configuration
base_config = {
    "num_filters": 16,
    "kernel_size": 3,
    "stride": 1,
    "padding": 1,
    "fc_neurons": 128,
    "dropout_rate": 0,
    "pool_size": 2,
    "pool_type": "max",
    "learning_rate": 0.001,
    "batch_size": 32,
    "weight_decay": 0.0,
    "optimizer_name": "Adam",
}

# Parameters to tune (one at a time)
parameters_to_tune = {
    "num_filters": [16, 32],
    "kernel_size": [3, 5],
    "stride": [1,2],
    "padding": [0,1],
    "fc_neurons": [128,256],
    "dropout_rate": [0,0.2],
    "pool_size": [2,3],
    "pool_type": ["avg"],
    "learning_rate": [0.001,0.0005],
    "batch_size": [32,64],
    "weight_decay": [0.0,0.01],
    "optimizer_name": ["Adam","SGD"],
}

In [17]:
for param, values in parameters_to_tune.items():
    for value in values:
        # Update the current parameter in the base config
        current_config = base_config.copy()
        current_config[param] = value

        # Create the dataloader with the updated batch size
        dataloader = DataLoader(combined_dataset, batch_size=current_config["batch_size"], shuffle=True)

        # Initialize the model
        model = SimpleCNN(
            num_filters=current_config["num_filters"],
            kernel_size=current_config["kernel_size"],
            stride=current_config["stride"],
            padding=current_config["padding"],
            fc_neurons=current_config["fc_neurons"],
            dropout_rate=current_config["dropout_rate"],
            pool_size=current_config["pool_size"],
            pool_type=current_config["pool_type"],
        )

        # Define optimizer
        if current_config["optimizer_name"] == "Adam":
            optimizer = optim.Adam(model.parameters(), lr=current_config["learning_rate"], weight_decay=current_config["weight_decay"])
        elif current_config["optimizer_name"] == "SGD":
            optimizer = optim.SGD(model.parameters(), lr=current_config["learning_rate"], weight_decay=current_config["weight_decay"])

        # Define loss function
        criterion = nn.CrossEntropyLoss()

        # Training loop
        num_epochs = 5
        loss_per_epoch = []

        for epoch in range(num_epochs):
            running_loss = 0.0
            for images, labels in dataloader:
                optimizer.zero_grad()
                labels = labels.long()

                # Forward pass (fc1 adjustment happens here if needed)
                outputs = model(images)

                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()

            avg_loss = running_loss / len(dataloader)
            loss_per_epoch.append(avg_loss)
            print(f'Tuning {param}={value}, Epoch={epoch+1}, Loss={avg_loss}')


        # Save the model
        model_name = f'model_{param}{value}.pth'
        torch.save(model, os.path.join('/content/drive/MyDrive/cnn_models_augmented_data', model_name))

        # Save the loss vs. epoch data
        log_name = f'loss_{param}{value}.txt'
        with open(os.path.join('/content/drive/MyDrive/logs_augmented', log_name), 'w') as f:
            for epoch, loss in enumerate(loss_per_epoch, 1):
                f.write(f'Epoch {epoch}, Loss: {loss}\n')

fc1 input size adjusted to: 65536
Tuning num_filters=16, Epoch=1, Loss=4.330350749275901
Tuning num_filters=16, Epoch=2, Loss=3.8886219306425613
Tuning num_filters=16, Epoch=3, Loss=3.716588181582364
Tuning num_filters=16, Epoch=4, Loss=3.629050475467335
Tuning num_filters=16, Epoch=5, Loss=3.5748000621795653
fc1 input size adjusted to: 131072
Tuning num_filters=32, Epoch=1, Loss=4.304433562972329


KeyboardInterrupt: 

In [10]:
#Save the model's state_dict
#torch.save(model.state_dict(), '/content/drive/MyDrive/cnn_proj_models/cnn_test')

#To load it later:
model = SimpleCNN(
    num_filters=base_config["num_filters"],
    kernel_size=base_config["kernel_size"],
    stride=base_config["stride"],
    padding=base_config["padding"],
    fc_neurons=256,
    dropout_rate=0.3,
    pool_size=base_config["pool_size"],
    pool_type=base_config["pool_type"]
)
model.load_state_dict(torch.load('/content/drive/MyDrive/cnn_proj_models/model_fc_neurons256.pth'))


  model.load_state_dict(torch.load('/content/drive/MyDrive/cnn_proj_models/model_fc_neurons256.pth'))


<All keys matched successfully>

In [11]:
# Assuming you already have your dataset and dataloaders ready
# Example: train_loader, val_loader

# Define your loss function and optimizer
criterion = nn.CrossEntropyLoss()  # or the appropriate loss for your task
optimizer = optim.Adam(model.parameters(), lr=1e-4)  # Adjust learning rate if needed

# Number of additional epochs to train
num_epochs = 15  # Update this to your desired number

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0

    for inputs, labels in dataloader:
        optimizer.zero_grad()  # Clear gradients from the previous step
        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(dataloader)}")

Epoch 1/15, Loss: 0.5153155087938776
Epoch 2/15, Loss: 0.393408422631657
Epoch 3/15, Loss: 0.3312929634247785
Epoch 4/15, Loss: 0.2804775101173183
Epoch 5/15, Loss: 0.2543026895829193
Epoch 6/15, Loss: 0.2391717100981623
Epoch 7/15, Loss: 0.2140477980517418
Epoch 8/15, Loss: 0.20052805420456696
Epoch 9/15, Loss: 0.1771318913720872
Epoch 10/15, Loss: 0.1591620927312128
Epoch 11/15, Loss: 0.15080908696045695
Epoch 12/15, Loss: 0.13345368404913208
Epoch 13/15, Loss: 0.12577146495206523
Epoch 14/15, Loss: 0.11702878720572461
Epoch 15/15, Loss: 0.11167127481641491


In [13]:
torch.save(model.state_dict(), '/content/drive/MyDrive/cnn_proj_models/fc_neurons_256_epochs_15')

In [None]:
df['Target'].value_counts()

Unnamed: 0_level_0,count
Target,Unnamed: 1_level_1
1,100
68,100
21,100
38,100
74,100
...,...
25,100
62,100
20,100
47,94


In [12]:
val_dataset = ImageDataset(dataframe=val_df, transform=transform)
test_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)


model.eval()

# No need to track gradients during inference
correct_predictions = 0
total_predictions = 0

# Iterate over the test dataset
with torch.no_grad():  # Disable gradient calculations for evaluation
    for images, labels in test_dataloader:
        labels = labels.long()  # Ensure the labels are of type Long (int64)

        # Forward pass
        outputs = model(images)

        # Get the predicted class with the highest probability
        _, predicted = torch.max(outputs, 1)

        # Count correct predictions
        correct_predictions += (predicted == labels).sum().item()
        total_predictions += labels.size(0)

# Calculate the accuracy
accuracy = 100 * correct_predictions / total_predictions
print(f'Accuracy on the val set: {accuracy:.2f}%')

Accuracy on the val set: 30.98%


In [None]:
test_dataset = ImageDataset(dataframe=test_df, transform=transform)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)


model.eval()

# No need to track gradients during inference
correct_predictions = 0
total_predictions = 0

# Iterate over the test dataset
with torch.no_grad():  # Disable gradient calculations for evaluation
    for images, labels in test_dataloader:
        labels = labels.long()  # Ensure the labels are of type Long (int64)

        # Forward pass
        outputs = model(images)

        # Get the predicted class with the highest probability
        _, predicted = torch.max(outputs, 1)

        # Count correct predictions
        correct_predictions += (predicted == labels).sum().item()
        total_predictions += labels.size(0)

# Calculate the accuracy
accuracy = 100 * correct_predictions / total_predictions
print(f'Accuracy on the val set: {accuracy:.2f}%')