In [5]:
import os
import cv2
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
import torch.nn.functional as F
from utilities import utils, process, evaluate, modify, plot
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import torch.nn as nn
import torch.optim as optim

In [2]:
df = utils.load_images_to_dataframe('data/preprocessed')

# Encode the labels
label_encoder = LabelEncoder()
df['Target'] = label_encoder.fit_transform(df['Target'])

# Split the data into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
# Further split the training set into training and validation sets
train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=42)


df.head(5)

Unnamed: 0_level_0,image,Target
filename,Unnamed: 1_level_1,Unnamed: 2_level_1
user001_abjadiyah_031.png,"[[255, 255, 255, 255, 255, 255, 255, 255, 255,...",0
user001_abjadiyah_032.png,"[[255, 255, 255, 255, 255, 255, 255, 255, 255,...",0
user001_abjadiyah_033.png,"[[255, 255, 255, 255, 255, 255, 255, 255, 255,...",0
user001_abjadiyah_034.png,"[[255, 255, 255, 255, 255, 255, 255, 255, 255,...",0
user001_abjadiyah_035.png,"[[255, 255, 255, 255, 255, 255, 255, 255, 255,...",0


In [11]:
# Custom Dataset class
class ImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        # Access the stored image data in grayscale
        image = self.dataframe.iloc[idx, 0]  # 'image' column has the image data (grayscale)
        label = self.dataframe.iloc[idx, 1]  # 'Target' column has the label

        # Convert grayscale image to RGB if needed
        image = np.expand_dims(image, axis=-1)  # Add channel dimension (H, W, 1)
        image = np.repeat(image, 3, axis=-1)  # Convert to RGB by duplicating the grayscale channel

        # Convert numpy array to PIL image
        image = Image.fromarray(image)

        # Apply transformations if any
        if self.transform:
            image = self.transform(image)

        return image, label

In [14]:
print(df['Target'].unique())


[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81]


In [15]:




# Define the transformation
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# Create the dataset and dataloader
dataset = ImageDataset(dataframe=df, transform=transform)

dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(32 * 32 * 32, 128)
        self.fc2 = nn.Linear(128, 82)  # Adjust to 82 classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 32 * 32)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the model, loss function, and optimizer
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for images, labels in dataloader:
        optimizer.zero_grad()
        
        # Ensure the labels are of type Long (int64)
        labels = labels.long()  # Convert labels to Long type
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch {epoch+1}, Loss: {running_loss/len(dataloader)}')

print('Finished Training')


Epoch 1, Loss: 3.9647690782360003
Epoch 2, Loss: 2.901468084372726
Epoch 3, Loss: 1.6404979584263821
Epoch 4, Loss: 0.6163130083504845
Epoch 5, Loss: 0.14525239270399598
Epoch 6, Loss: 0.024040215950934033
Epoch 7, Loss: 0.005312544948366635
Epoch 8, Loss: 0.0018021448246002489
Epoch 9, Loss: 0.0009643208915732947
Epoch 10, Loss: 0.0006225368667704364
Finished Training


In [6]:
df['Target'].value_counts()

Target
71    100
1     100
2     100
55    100
39    100
     ... 
59    100
60    100
8     100
47     94
58     50
Name: count, Length: 82, dtype: int64

In [17]:
test_dataset = ImageDataset(dataframe=test_df, transform=transform)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)


model.eval()

# No need to track gradients during inference
correct_predictions = 0
total_predictions = 0

# Iterate over the test dataset
with torch.no_grad():  # Disable gradient calculations for evaluation
    for images, labels in test_dataloader:
        labels = labels.long()  # Ensure the labels are of type Long (int64)
        
        # Forward pass
        outputs = model(images)
        
        # Get the predicted class with the highest probability
        _, predicted = torch.max(outputs, 1)
        
        # Count correct predictions
        correct_predictions += (predicted == labels).sum().item()
        total_predictions += labels.size(0)

# Calculate the accuracy
accuracy = 100 * correct_predictions / total_predictions
print(f'Accuracy on the test set: {accuracy:.2f}%')

Accuracy on the test set: 100.00%
