In [None]:
import torch
import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super().__init__()

        self.feature = nn.Sequential(
            nn.Conv2d(3, 8, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(8, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )

        self.classifier = nn.Sequential(
            nn.Linear(4*4*32, 100),
            nn.ReLU(),
            # nn.Dropout(0.5),
            nn.Linear(100, 100),
            nn.ReLU(),
            # nn.Dropout(0.5),
            nn.Linear(100, 10)
        )

    def forward(self, x):
        x = self.feature(x)
        x = torch.flatten(x,1)
        x = self.classifier(x)
        return x

# net = Net().to('cuda')
net = Net()
print(net)

Net(
  (feature): Sequential(
    (0): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=512, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): ReLU()
    (4): Linear(in_features=100, out_features=10, bias=True)
  )
)


In [None]:
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
batch_size = 100

trainset = datasets.CIFAR10(root='./data', train=True,transform=transform, download=True)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torch.optim as optim
import torch.nn as nn

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, factor=0.1, patience=3)

In [None]:
EPOCHS = 10
trainset_losses = []
testset_losses = []

for epoch in range(EPOCHS):
  net.train()
  losses = []
  running_loss = 0

  for i, batch in enumerate(trainloader):
    inputs, labels = batch
    # inputs, labels = inputs.to('cuda'), labels.to('cuda')
    optimizer.zero_grad()

    outputs = net(inputs) #Model calculates its predictions based on inputs
    loss = criterion(outputs, labels) #The loss function calculates the loss value by comparing the predictions to the true labels (represents how well the model is performing on the current batch of data)
    losses.append(loss.item())

    loss.backward() #Computes the gradients of the loss with respect to the models parameters (these gradients indicate the direction in which each parameter should be adjusted to minimize the loss)
    optimizer.step() #Updates the model's parameters based on the gradients computed during the backwards pass

    running_loss += loss.item()

    if i % 100 == 99:
      print(f'Loss [{epoch + 1} {i+1}] (epoch, minibatch): ', running_loss/100) #Average loss for 100 mini-batches
      running_loss = 0.0

  avg_loss = sum(losses) / len(losses) #Average loss over the whole epoch
  trainset_losses.append(avg_loss)

  #Updating learning rate based on the average loss, using LearningRateScheduling
  scheduler.step(avg_loss)
  print(f'Average loss for epoch {epoch+1}: ', avg_loss)
  current_lr = optimizer.param_groups[0]['lr']
  print(f"Current learning rate: ", current_lr)

  #Evaluating on the test set
  net.eval()
  correct = 0
  total = 0
  test_loss = 0

  with torch.no_grad():
    for data in testloader:
      inputs, labels = data
      # inputs, labels = inputs.to('cuda'), labels.to('cuda')

      outputs = net(inputs)
      loss = criterion(outputs, labels)
      test_loss += loss.item()

      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

  print("total: ", total)
  print("test_loss: ", test_loss)
  test_loss /= total
  testset_losses.append(test_loss)
  print(f'Test loss for epoch {epoch+1}: ', test_loss)
  print(f"Accuracy on test set: ", {(correct/total)*100})

print('Training Done')


Loss [1 100] (epoch, minibatch):  0.5479630723595619
Loss [1 200] (epoch, minibatch):  0.5686949703097344
Loss [1 300] (epoch, minibatch):  0.5961054879426956
Loss [1 400] (epoch, minibatch):  0.6061752808094024
Loss [1 500] (epoch, minibatch):  0.590137037038803
Average loss for epoch 1:  0.5818151696920395
Current learning rate:  0.01
total:  10000
test_loss:  95.05326396226883
Test loss for epoch 1:  0.009505326396226883
Accuracy on test set:  {69.43}
Loss [2 100] (epoch, minibatch):  0.520463450551033
Loss [2 200] (epoch, minibatch):  0.531249333024025
Loss [2 300] (epoch, minibatch):  0.5677753329277039
Loss [2 400] (epoch, minibatch):  0.5836708948016167
Loss [2 500] (epoch, minibatch):  0.5619190034270286
Average loss for epoch 2:  0.5530156029462814
Current learning rate:  0.01
total:  10000
test_loss:  95.58156442642212
Test loss for epoch 2:  0.009558156442642213
Accuracy on test set:  {69.25}
Loss [3 100] (epoch, minibatch):  0.4858446019887924
Loss [3 200] (epoch, minibatch

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

plt.plot(trainset_losses, label="Training Set Losses")
plt.plot(testset_losses, label="Test Set Losses")
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title("Training and Test Losses")
plt.legend()
plt.show()

NameError: name 'trainset_losses' is not defined

In [None]:
correct = 0
total = 0

with torch.no_grad():
    for data in testloader: #The reason for dividing the testset into batches is for memory efficiency and parallel processing (can take advantage of GPU acceleration)
        inputs, labels = data
        # inputs, labels = inputs.to('cuda'), labels.to('cuda')

        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f"Accuracy over {total} images: {(correct / total)*100}%")


Accuracy over 10000 images: 68.67%


In [None]:
%cd /content/drive/My Drive/Colab Notebooks/Deep Learning

/content/drive/My Drive/Colab Notebooks/Deep Learning


In [None]:
model_path = "./YoshiNetV2_model_weights.pth"

torch.save({
    'model_state_dict':net.state_dict(),
    'optimizer_state_dict':optimizer.state_dict(),
}, model_path)

In [None]:
#loading the saved weights into the current model
checkpoint = torch.load("/content/drive/My Drive/Colab Notebooks/Deep Learning/V2resnet50_model_weights.pth", map_location='cpu') #remove map_location for when cuda available
net.load_state_dict(checkpoint["model_state_dict"])

net.eval() #setting the model to evaluation mode, different from net.train() which means layers like dropout and batch norm will behave as expected