<a href="https://colab.research.google.com/github/warm007/CIIC5015-Project-2_WilliamRodriguez/blob/main/P2_Classification_William_Rodriguez.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# William A. Rodríguez Mercado
# CIIC 5015 Sec 096

# **Classification**

In [5]:
#Imports
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
import torch.nn.functional as F



#Set the amount of desires epochs, input size and learning rate
input_size = 784
num_classes = 10
num_epochs = 20
batch_size = 100
learning_rate = 0.01

In [6]:
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='../../data',
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='../../data',
                                          train=False,
                                          transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#Template
writer1 = SummaryWriter('runs/Template')

# For the second model
writer2 = SummaryWriter('runs/Network1')

# For the third model
writer3 = SummaryWriter('runs/Network2')

writer4 = SummaryWriter('runs/Network3')

# **Template: 3-layer implementation of a network**

In [8]:
# Fully connected neural network
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# **Network #1: 4-layer network**


*   Layer 1 – 20 neurons
*  Layer 2 – 50 neurons
*   Layer 3 – 20 neurons
*   Layer 4 – output neuron with softmax activation



In [9]:
class Network1(nn.Module):
    def __init__(self, input_size, num_classes):
        super(Network1, self).__init__()
        self.fc1 = nn.Linear(input_size, 20)
        self.fc2 = nn.Linear(20, 50)
        self.fc3 = nn.Linear(50, 20)
        self.fc4 = nn.Linear(20, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# **Network #2: 6- layer network**

*   Layer 1 – 10 neurons
*   Layer 2 – 20 neurons
*   Layer 3 – 30 neurons
*   Layer 4 – 20 neurons
*   Layer 5 – 10 neurons
*   Layer 6 – output neuron with softmax activation


In [10]:
class Network2(nn.Module):
    def __init__(self, input_size, num_classes):
        super(Network2, self).__init__()
        self.fc1 = nn.Linear(input_size, 10)
        self.fc2 = nn.Linear(10, 20)
        self.fc3 = nn.Linear(20, 30)
        self.fc4 = nn.Linear(30, 20)
        self.fc5 = nn.Linear(20, 10)
        self.fc6 = nn.Linear(10, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = self.fc6(x)
        return x  # Apply softmax on the output

# **Network #3: 6- layer network**

*   Layer 1 – 10 neurons
*   Layer 2 – 40 neurons
*   Layer 3 – 70 neurons
*   Layer 4 – 40 neurons
*   Layer 5 – 10 neurons
*   Layer 6 – output neuron with softmax activation



In [11]:
class Network3(nn.Module):
    def __init__(self, input_size, num_classes):
        super(Network3, self).__init__()
        self.fc1 = nn.Linear(input_size, 10)
        self.fc2 = nn.Linear(10, 40)
        self.fc3 = nn.Linear(40, 70)
        self.fc4 = nn.Linear(70, 40)
        self.fc5 = nn.Linear(40, 10)
        self.fc6 = nn.Linear(10, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = self.fc6(x)
        return x

Init Models:

In [12]:
Network1Model = Network1(input_size,num_classes).to(device)

Network2Model = Network2(input_size,num_classes)

Network3Model = Network3(input_size,num_classes)



#  Training and Testing Functions





In [13]:
# Train the model
def train_model(model, criterion, optimizer, train_loader, device, num_epochs, writer):
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        model.train()
        total_loss,count = 0, 0
        for i, (images, labels) in enumerate(train_loader):
            # Move tensors to the configured device
            images = images.reshape(-1, 28*28).to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            writer.add_scalar('Loss/train', loss.item(), epoch )
            # Backpropagation and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            count += 1


            if (i+1) % 100 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))

        writer.add_scalar('Loss/train', total_loss/count, epoch + 1 )


In [14]:
# Test the model
def test_model(model, test_loader, device, writer, epoch):

    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.reshape(-1, 28*28).to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        print('Accuracy of the network on the 10000 test images: {} %'.format(accuracy))
        writer.add_scalar('accuracy', accuracy, epoch)

# Training and Testing Results Network1

In [15]:
criterion = nn.CrossEntropyLoss()
optimizer1 = torch.optim.Adam(Network1Model.parameters(), lr=learning_rate)
train_model(Network1Model, criterion, optimizer1, train_loader, device, num_epochs, writer2)

Epoch [1/20], Step [100/600], Loss: 0.4792
Epoch [1/20], Step [200/600], Loss: 0.1667
Epoch [1/20], Step [300/600], Loss: 0.4245
Epoch [1/20], Step [400/600], Loss: 0.2301
Epoch [1/20], Step [500/600], Loss: 0.2292
Epoch [1/20], Step [600/600], Loss: 0.2453
Epoch [2/20], Step [100/600], Loss: 0.1446
Epoch [2/20], Step [200/600], Loss: 0.1617
Epoch [2/20], Step [300/600], Loss: 0.1406
Epoch [2/20], Step [400/600], Loss: 0.2659
Epoch [2/20], Step [500/600], Loss: 0.2897
Epoch [2/20], Step [600/600], Loss: 0.1376
Epoch [3/20], Step [100/600], Loss: 0.1916
Epoch [3/20], Step [200/600], Loss: 0.0366
Epoch [3/20], Step [300/600], Loss: 0.2053
Epoch [3/20], Step [400/600], Loss: 0.1807
Epoch [3/20], Step [500/600], Loss: 0.0917
Epoch [3/20], Step [600/600], Loss: 0.2140
Epoch [4/20], Step [100/600], Loss: 0.1223
Epoch [4/20], Step [200/600], Loss: 0.3844
Epoch [4/20], Step [300/600], Loss: 0.1767
Epoch [4/20], Step [400/600], Loss: 0.0737
Epoch [4/20], Step [500/600], Loss: 0.1029
Epoch [4/20

In [17]:
test_model(Network1Model, test_loader, device, writer2, num_epochs)

Accuracy of the network on the 10000 test images: 95.83 %


# Training and Testing Results Network2

In [16]:
optimizer2 = torch.optim.Adam(Network2Model.parameters(), lr=learning_rate)
train_model(Network2Model, criterion, optimizer2, train_loader, device, num_epochs, writer3)

Epoch [1/20], Step [100/600], Loss: 0.9902
Epoch [1/20], Step [200/600], Loss: 0.5511
Epoch [1/20], Step [300/600], Loss: 0.3832
Epoch [1/20], Step [400/600], Loss: 0.8618
Epoch [1/20], Step [500/600], Loss: 0.3482
Epoch [1/20], Step [600/600], Loss: 0.7320
Epoch [2/20], Step [100/600], Loss: 0.5134
Epoch [2/20], Step [200/600], Loss: 0.3247
Epoch [2/20], Step [300/600], Loss: 0.4299
Epoch [2/20], Step [400/600], Loss: 0.3082
Epoch [2/20], Step [500/600], Loss: 0.2593
Epoch [2/20], Step [600/600], Loss: 0.3746
Epoch [3/20], Step [100/600], Loss: 0.4856
Epoch [3/20], Step [200/600], Loss: 0.3953
Epoch [3/20], Step [300/600], Loss: 0.3570
Epoch [3/20], Step [400/600], Loss: 0.3695
Epoch [3/20], Step [500/600], Loss: 0.3032
Epoch [3/20], Step [600/600], Loss: 0.3092
Epoch [4/20], Step [100/600], Loss: 0.3542
Epoch [4/20], Step [200/600], Loss: 0.3031
Epoch [4/20], Step [300/600], Loss: 0.3831
Epoch [4/20], Step [400/600], Loss: 0.4153
Epoch [4/20], Step [500/600], Loss: 0.2011
Epoch [4/20

In [18]:
test_model(Network2Model, test_loader, device, writer3, num_epochs)

Accuracy of the network on the 10000 test images: 91.11 %


# Training and Testing Results Network3

In [19]:
optimizer3 = torch.optim.Adam(Network3Model.parameters(), lr=learning_rate)
train_model(Network3Model, criterion, optimizer3, train_loader, device, num_epochs, writer4)

Epoch [1/20], Step [100/600], Loss: 0.9544
Epoch [1/20], Step [200/600], Loss: 0.6230
Epoch [1/20], Step [300/600], Loss: 0.4956
Epoch [1/20], Step [400/600], Loss: 0.3465
Epoch [1/20], Step [500/600], Loss: 0.4935
Epoch [1/20], Step [600/600], Loss: 0.3569
Epoch [2/20], Step [100/600], Loss: 0.2480
Epoch [2/20], Step [200/600], Loss: 0.2577
Epoch [2/20], Step [300/600], Loss: 0.3242
Epoch [2/20], Step [400/600], Loss: 0.2145
Epoch [2/20], Step [500/600], Loss: 0.3685
Epoch [2/20], Step [600/600], Loss: 0.3341
Epoch [3/20], Step [100/600], Loss: 0.1958
Epoch [3/20], Step [200/600], Loss: 0.4065
Epoch [3/20], Step [300/600], Loss: 0.2455
Epoch [3/20], Step [400/600], Loss: 0.3371
Epoch [3/20], Step [500/600], Loss: 0.1141
Epoch [3/20], Step [600/600], Loss: 0.2533
Epoch [4/20], Step [100/600], Loss: 0.3278
Epoch [4/20], Step [200/600], Loss: 0.2749
Epoch [4/20], Step [300/600], Loss: 0.3005
Epoch [4/20], Step [400/600], Loss: 0.1716
Epoch [4/20], Step [500/600], Loss: 0.1317
Epoch [4/20

In [20]:
test_model(Network3Model, test_loader, device, writer4, num_epochs)

Accuracy of the network on the 10000 test images: 92.57 %


# **Conclusion**

In a comparative analysis of three neural network models designed for a classification task, it becomes apparent that the architecture complexity varies significantly across the models. The first model is a 4-layer network with intermediate layers composed of 20, 50, and 20 neurons, respectively, followed by an output layer with a softmax activation function. The second model expands upon this with a 6-layer configuration, including neuron counts of 10, 20, 30, 20, and 10 in the intermediate layers, also concluding with a softmax output. The third model mirrors this 6-layer approach but opts for a more complex neuron arrangement: 10, 40, 70, 40, and 10 neurons across its intermediate layers, leading to a softmax output.

Upon completion of 20 training epochs, set with a learning rate of 0.01, the models yielded the following accuracy rates on a test set of 10,000 images: Network1 reached the highest accuracy of 95.83%, whereas Network2 and Network3 recorded lower accuracies of 91.11% and 92.57%, respectively. Based on these figures, Network1 emerges as the model with the least error in validation.

The duration of training for each model was not explicitly provided; however, it's generally observed that networks with fewer parameters and less complexity tend to train faster. In this scenario, one would expect Network1 to train faster than the others due to its simpler 4-layer architecture. The more extensive and deeper networks (Network2 and Network3) likely required more time for training, with Network3 potentially taking the longest given its greater number of neurons, despite both having six layers.