In [None]:
from google.colab import drive
drive.mount("/content/drive")


Mounted at /content/drive


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim



In [None]:
transform =transforms.Compose([
    transforms.ToTensor(),
])

trainset = torchvision.datasets.CIFAR10(root='/content/drive/MyDrive/cifar-10-python', train=True, transform=transform)
trainloader=torch.utils.data.DataLoader(trainset,batch_size=64,shuffle=True)

testset=torchvision.datasets.CIFAR10(root='/content/drive/MyDrive/cifar-10-python',train=False,transform=transform)

testloader=torch.utils.data.DataLoader(testset,batch_size=64,shuffle=True)


# print((trainloader[0]))

## traindata is containing 50000 images 3x32x32 and their label
## test data is containing 10000 images of same dimension

In [None]:


## model defintion

class MLP(nn.Module):
    def __init__(self):
        super(MLP,self).__init__()
        self.flatten=nn.Flatten()
        self.fc1=nn.Linear(32*32*3,256)
        self.fc2=nn.Linear(256,128)
        self.fc3=nn.Linear(128,10)

    def forward(self,x):
        x=self.flatten(x)
        x=torch.relu(self.fc1(x))
        x=torch.relu(self.fc2(x))
        x=self.fc3(x)

        return x


use_cuda=torch.cuda.is_available()
device=torch.device("cuda:0" if use_cuda else "cpu")
if use_cuda:
    model=MLP().cuda()

loss_function=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=0.0001)


Accuracies=[]

## Training
print("Training started")
for epoch in range(20):
    print("epoch=",epoch+1," is running")
    loss=0.0
    for i,data in enumerate(trainloader,1):
        inputs,labels=data[0].to(device),data[1].to(device)
        # print(i)

        optimizer.zero_grad()

        ouputs=model(inputs)
        loss=loss_function(ouputs,labels)
        loss.backward()
        optimizer.step()


        loss+=loss.item()
    print("training finished")

# evaluation of the trained model
    correct=0
    total=0
    with torch.no_grad():
        for data in testloader:
            images,labels=data[0].to(device),data[1].to(device)
            outputs=model(images)
            _,predicted=torch.max(outputs.data,1)
            total+=labels.size(0)
            correct+=(predicted==labels).sum().item()
    print('Accuracy of the network on the 10000 test images: %d %% \n' % (100 * correct / total))
    Accuracies.append((100 * correct / total))



Training started
epoch= 1  is running
training finished
Accuracy of the network on the 10000 test images: 36 % 

epoch= 2  is running
training finished
Accuracy of the network on the 10000 test images: 39 % 

epoch= 3  is running
training finished
Accuracy of the network on the 10000 test images: 41 % 

epoch= 4  is running
training finished
Accuracy of the network on the 10000 test images: 44 % 

epoch= 5  is running
training finished
Accuracy of the network on the 10000 test images: 45 % 

epoch= 6  is running
training finished
Accuracy of the network on the 10000 test images: 45 % 

epoch= 7  is running
training finished
Accuracy of the network on the 10000 test images: 45 % 

epoch= 8  is running
training finished
Accuracy of the network on the 10000 test images: 47 % 

epoch= 9  is running
training finished
Accuracy of the network on the 10000 test images: 46 % 

epoch= 10  is running
training finished
Accuracy of the network on the 10000 test images: 47 % 

epoch= 11  is running


In [None]:
print("MLP")
print('Accuracy of the MLP model on the test images: %.2f %%' % (100 * correct / total))
# print("\n\n")
# print(correct,total)

MLP
Accuracy of the MLP model on the test images: 51.47 %


In [None]:

# Define CNN architecture
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.fc1 = nn.Linear(32 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32 * 8 * 8)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the CNN model
cnn_model = SimpleCNN()
cnn_model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

# Train the CNN model
print("Training started...")
for epoch in range(20):  # Number of epochs
    print("epoch=",epoch+1,"is running")
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device),data[1].to(device)
        optimizer.zero_grad()
        outputs = cnn_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print("Training finished")

    # Evaluate the CNN model
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device),data[1].to(device)
            outputs = cnn_model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the CNN model on the test images: %.2f %%\n' % (100 * correct / total))


Training started...
epoch= 1 is running
Training finished
Accuracy of the CNN model on the test images: 52.74 %

epoch= 2 is running
Training finished
Accuracy of the CNN model on the test images: 58.94 %

epoch= 3 is running
Training finished
Accuracy of the CNN model on the test images: 63.87 %

epoch= 4 is running
Training finished
Accuracy of the CNN model on the test images: 64.95 %

epoch= 5 is running
Training finished
Accuracy of the CNN model on the test images: 68.36 %

epoch= 6 is running
Training finished
Accuracy of the CNN model on the test images: 68.82 %

epoch= 7 is running
Training finished
Accuracy of the CNN model on the test images: 68.12 %

epoch= 8 is running
Training finished
Accuracy of the CNN model on the test images: 69.83 %

epoch= 9 is running
Training finished
Accuracy of the CNN model on the test images: 69.04 %

epoch= 10 is running
Training finished
Accuracy of the CNN model on the test images: 69.53 %

epoch= 11 is running
Training finished
Accuracy o

In [None]:
print("CNN Architecture")
print('Accuracy of the CNN model on the test images: %.2f %%' % (100 * correct / total))
print("\n\n")

CNN Architecture
Accuracy of the CNN model on the test images: 67.17 %





In [None]:

# import torchvision.models as models

# # Load pre-trained VGG model
# vg_model = models.vgg16(pretrained=True)

# print(vg_model)

# # Check the output size of the last layer
# num_features = vg_model.classifier[-1].in_features
# print("Number of input features:", num_features)

# last_linear_idx = None
# for idx, layer in enumerate(vg_model.classifier):
#     if isinstance(layer, torch.nn.Linear):
#         last_linear_idx = idx
# print("last leayer index to be changed",last_linear_idx)


In [None]:
# import torch
# import torchvision
# import torchvision.transforms as transforms

# # Check if GPU is available
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# # Load pre-trained VGG model
# vg_model = torch.hub.load('pytorch/vision:v0.10.0', 'vgg16', pretrained=True)

# # Freeze the pre-trained layers
# for param in vg_model.parameters():
#     param.requires_grad = False


# # Modify the last fully connected layer for 10 classes
# vg_model.classifier[6] = nn.Linear(4096, 10)  # Assuming VGG11's classifier has 4096 input features


# # Define transformations
# transform = transforms.Compose([
#     transforms.Resize(256),
#     transforms.CenterCrop(224),  # Resize images to match VGG input size
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
# ])

# # Load CIFAR-10 dataset
# testset = torchvision.datasets.CIFAR10(root="/content/drive/MyDrive/cifar-10-python", train=False, transform=transform)
# testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

# correct = 0
# total = 0
# with torch.no_grad():
#     for data in testloader:
#         images, labels = data[0].to(device), data[1].to(device)
#         outputs = vg_model(images)
#         _, predicted = torch.max(outputs, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()
#         print(predicted,labels)

# print('Accuracy of the VGG model on the test images: %.2f %%' % (100 * correct / total))


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

# Check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load pre-trained VGG model
vg_model = torch.hub.load('pytorch/vision:v0.10.0', 'vgg16', pretrained=True)

# Freeze the pre-trained layers
for param in vg_model.parameters():
    param.requires_grad = False

features=list(vg_model.classifier.children())
features.extend([nn.Linear(1000,100)])
features.extend([nn.Linear(100,10)])
vg_model.classifier = nn.Sequential(*features)
print((vg_model))

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:

# Define loss function and optimizer
vg_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vg_model.classifier[7:].parameters(), lr=0.0001)
# optimizer = optim.Adam(vg_model.classifier[7:].parameters(), lr=0.001)
#
# Train the CNN model
print("Training started...")

vg_model.train()
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
    inputs, labels = data[0].to(device),data[1].to(device)
    optimizer.zero_grad()
    outputs = vg_model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    running_loss += loss.item()

print("Training finished")

# Evaluate the CNN model
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device),data[1].to(device)
        outputs = vg_model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the VGG_16 model on the test images: %.2f %%\n' % (100 * correct / total))


Training started...
Training finished
Accuracy of the VGG_16 model on the test images: 38.42 %



In [42]:
import torch
import torchvision
import torchvision.transforms as transforms

# Check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load pre-trained VGG model
vg_model = torch.hub.load('pytorch/vision:v0.10.0', 'vgg16', pretrained=True)

# Freeze the pre-trained layers
for param in vg_model.parameters():
    param.requires_grad = False

features=list(vg_model.classifier.children())[:-1]
features.extend([nn.Linear(4096,10)])
vg_model.classifier = nn.Sequential(*features)
print((vg_model))

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [43]:

# Define loss function and optimizer
vg_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vg_model.classifier[-1].parameters(), lr=0.0001)
# optimizer = optim.Adam(vg_model.classifier[7:].parameters(), lr=0.001)
#
# Train the CNN model
print("Training started...")

vg_model.train()
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
    inputs, labels = data[0].to(device),data[1].to(device)
    optimizer.zero_grad()
    outputs = vg_model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    running_loss += loss.item()

print("Training finished")

# Evaluate the CNN model
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device),data[1].to(device)
        outputs = vg_model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the VGG_16 model on the test images: %.2f %%\n' % (100 * correct / total))


Training started...
Training finished
Accuracy of the VGG_16 model on the test images: 33.41 %



Comparison of Test Set Accuracy and Loss Across Models
Test Set Accuracy:

MLP (Multi-Layer Perceptron): 51.47%
CNN (Convolutional Neural Network): 67.17%
VGG-based model (Pretrained VGG16): 33%
Reasons Behind Differences in Performance:

MLP:
MLPs treat each pixel as an independent feature and do not capture spatial relationships in images.
Consequently, MLPs may struggle to extract meaningful features from images, resulting in lower accuracy.

CNN:
CNNs leverage convolutional layers to capture local patterns and spatial hierarchies in images.
This spatial understanding allows CNNs to learn more robust and discriminative features, leading to higher accuracy.
VGG-based model:

Fine-tuning a pre-trained VGG16 model .
It requires careful adjustment of hyperparameters.
I added linera layer to fine tune. It did not adapt well and give limited performance compare to MLP.
But it took very less time to train. This is very useful to use in real time applicaion.


Analysis:

CNNs utilize convolutional layers to capture spatial structure, making them more effective for image-related tasks.
MLPs, lacking this capability, struggle to extract meaningful features from images.
Transfer Learning with VGG:

In summary, CNNs outperform MLPs for image classification due to their ability to capture spatial structure. While transfer learning with VGG can be beneficial, it requires careful fine-tuning to achieve optimal performance on the target dataset.