In [2]:
#Testbed 2.3.4 LeakyReLU

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np
import time
import csv

  from .autonotebook import tqdm as notebook_tqdm


#prototype densenet
import torch
import torch.nn as nn
import torch.nn.functional as F

class DenseNet(nn.Module):
    def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
                 num_init_features=64, bn_size=4, drop_rate=0, num_classes=10):
        super(DenseNet, self).__init__()
        
        self.growth_rate = growth_rate
        self.block_config = block_config
        self.num_init_features = num_init_features
        self.bn_size = bn_size
        self.drop_rate = drop_rate
        self.num_classes = num_classes
        
        # First Convolutional layer
        self.features = nn.Sequential(OrderedDict([
            ('conv0', nn.Conv2d(3, num_init_features, kernel_size=3, stride=1, padding=1, bias=False)),
            ('norm0', nn.BatchNorm2d(num_init_features)),
            ('relu0', nn.ReLU(inplace=True)),
            ('pool0', nn.MaxPool2d(kernel_size=2, stride=2))
        ]))
        
        # Dense blocks
        num_features = num_init_features
        for i, num_layers in enumerate(block_config):
            block = DenseBlock(num_layers=num_layers, num_input_features=num_features,
                               bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
            self.features.add_module('denseblock%d' % (i + 1), block)
            num_features = num_features + num_layers * growth_rate
            if i != len(block_config) - 1:
                trans = Transition(num_input_features=num_features, num_output_features=num_features // 2)
                self.features.add_module('transition%d' % (i + 1), trans)
                num_features = num_features // 2
        
        # Final batch norm
        self.features.add_module('norm5', nn.BatchNorm2d(num_features))
        
        # Linear layer
        self.classifier = nn.Linear(num_features, num_classes)
        
        # Official init from torch repo.
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.constant_(m.bias, 0)
        
    def forward(self, x):
        features = self.features(x)


In [4]:
#adding L1 and L2 reg
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.pool = nn.MaxPool2d(2, 2)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(in_features=1024, out_features=512)
        self.fc2 = nn.Linear(in_features=512, out_features=256)
        self.fc3 = nn.Linear(in_features=256, out_features=128)
        self.fc4 = nn.Linear(in_features=128, out_features=64)
        self.fc5 = nn.Linear(in_features=64, out_features=10)
    def forward(self, x):
        x = nn.functional.leaky_relu(self.conv1(x), negative_slope=0.1)
        x = nn.functional.max_pool2d(x, kernel_size=2, stride=2)
        x = nn.Dropout(p=0.1)
        x = nn.functional.leaky_relu(self.conv2(x), negative_slope=0.1)
        x = nn.functional.max_pool2d(x, kernel_size=2, stride=2)
        x = nn.Dropout(p=0.2)
        x = nn.functional.leaky_relu(self.conv3(x), negative_slope=0.1)
        x = nn.functional.max_pool2d(x, kernel_size=2, stride=2)
        x = nn.Dropout(p=0.3)
        x = nn.functional.leaky_relu(self.conv4(x), negative_slope=0.1)
        x = nn.functional.max_pool2d(x, kernel_size=2, stride=2)
        x = nn.Dropout(p=0.4)
        x = nn.functional.leaky_relu(self.conv5(x), negative_slope=0.1)
        x = nn.functional.max_pool2d(x, kernel_size=2, stride=2)
        x = nn.Dropout(p=0.5)
        x = nn.functional.leaky_relu(self.conv6(x), negative_slope=0.1)
        x = nn.functional.max_pool2d(x, kernel_size=2, stride=2)
        x = x.view(x.size(0), -1)
        x = nn.functional.leaky_relu(self.fc1(x))
        x = nn.Dropout(p=0.5)
        x = nn.functional.leaky_relu(self.fc2(x))
        x = nn.functional.leaky_relu(self.fc3(x))
        x = nn.functional.leaky_relu(self.fc4(x))
        #x = nn.functional.softmax(self.fc3(x))
        x = self.fc5(x)
        return x

In [5]:
# Define the dataset and data loader
transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                transforms.RandomRotation(10),
                                transforms.RandomCrop(32, padding=4),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.2, 0.2, 0.2))])

In [6]:
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=100, shuffle=True, num_workers=2)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [7]:
# Define the loss function and optimizer
model = SimpleModel()
#loss function is used to measure the error between the predicted output of the model and the ground-truth label
criterion = nn.CrossEntropyLoss() #LF
optimizer1 = optim.Adam(model.parameters(), lr=0.001)
optimizer2 = optim.Adam(model.parameters(), lr=0.0005)
optimizer3 = optim.Adam(model.parameters(), lr=0.0002)
lr_ep = 50
lr_ep2 = 100

In [8]:
#Adjust learning rate after x epochs
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=40, gamma=0.1) # this killed the NN

In [9]:
# Define the number of training epochs
num_epochs = 300
patience = 99 #set to 99 for testing as we want to see long term model performance 
counter = 0

In [10]:
#TEMP load in model
#model = SimpleModel()
#model.load_state_dict(torch.load('leaky_relu_model2.3.3.pt'))
#results showed loading in ReLU model did not produce great loss results - weights are not really transferrable between different loss function models?

In [11]:
# Train the model
valid_test = 99
epoch_test = 0
train_losses = []
test_losses = []
train_acc = []
test_acc = []
time_list = []
t1 = time.perf_counter()
reset_flag = 0
print('Begin model training')
print('Patience has been set at {} for {} epochs'.format(patience, num_epochs)) 
print('Learning rate set at {} decreasing to {} at epoch {}'.format(optimizer1.param_groups[0]["lr"], optimizer2.param_groups[0]["lr"], lr_ep))
for epoch in range(num_epochs):
    train_loss = 0
    test_loss = 0
    
    #Set learning rate
    if lr_ep > epoch: #added flag to decrease LR further once model is reset
        optimizer = optimizer1
    elif lr_ep > epoch and lr_ep2 > epoch: 
        optimizer = optimizer2
    else:
        optimizer = optimizer3
    

    
    #Training 
    correct_t = 0
    total_t = 0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs) #tensor produced, indecipherable when printed 
        loss = criterion(outputs, labels) #criterion is loss function
        loss.backward() #used to calculate the gradients of the parameters of a model with respect to a loss function
        optimizer.step() #updates the model parameters based on the gradients computed during the backward pass of training
                
        _, preds = torch.max(outputs, 1) #produces tensor containing indices of the maximum values (i.e. the predicted classes)
        correct_t += (preds == labels).sum().item()
        train_loss += loss.item()
        total_t += labels.size(0) #total equals 50000 by the end of this for loop for CIFAR10
        #correct_t += (preds == labels).sum().item()
        #print(preds==labels)
        #print()
    
    #Validation
    correct_v = 0
    total_v = 0
    for i, data in enumerate(test_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)
        correct_v += (preds == labels).sum().item()
        test_loss += loss.item()
        total_v += labels.size(0) #total equals 10000 by the end of this for loop for CIFAR10
        #add correct
   
    
    #defining accuracy and loss 
    t_acc = correct_t/total_t
    v_acc = correct_v/total_v
    new_train_lost = train_loss / len(train_loader)
    new_valid_lost = test_loss / len(test_loader)
    
    #Save trained model if it is improved 
    if new_valid_lost < valid_test:
        valid_test = new_valid_lost
        epoch_test = epoch+1
        torch.save(model.state_dict(), "leaky_relu_model2.3.4.pt")
        counter = 0
        print ('Temp model saved at Epoch {} with validation lost of {:.4f}'.format(epoch_test, valid_test))
    else:
        counter+=1
        print('No improvement in test, count is {}'.format(counter))
        
    #Terminates training after model stops improving based on patience 
    if counter > patience:
        break
    
    #prints stuff
    t2 = time.perf_counter()
    print('Training loss for Epoch {} is {:.4f} and Training accuracy is {:.2f}'.format(epoch + 1, new_train_lost, t_acc))
    print('Validation loss for Epoch {} is {:.4f} and Validation accuracy is {:.2f}'.format(epoch + 1, new_valid_lost, v_acc))
    print('Completed Epoch {} in {:.1f} seconds with LR of {}'.format(epoch + 1, t2-t1, optimizer.param_groups[0]["lr"]))
    #print('Epoch: {} Loss: {:.4f} Train_Acc: {:.4f}'.format(epoch, train_loss / len(dataloader), running_corrects.double() / len(dataset)))
    
    #makes list of loss, accuract, and time for epoch
    train_acc.append(t_acc)
    test_acc.append(v_acc)
    train_losses.append(new_train_lost)
    test_losses.append(new_valid_lost)
    time_list.append(t2-t1)

print ('Final model saved at Epoch {} with validation lost of {:.4f}'.format(epoch_test, valid_test))


Begin model training
Patience has been set at 99 for 300 epochs
Learning rate set at 0.001 decreasing to 0.0005 at epoch 50


TypeError: conv2d() received an invalid combination of arguments - got (Dropout, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: ([31;1mDropout[0m, [31;1mParameter[0m, [31;1mParameter[0m, [31;1mtuple[0m, [31;1mtuple[0m, [31;1mtuple[0m, [32;1mint[0m)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: ([31;1mDropout[0m, [31;1mParameter[0m, [31;1mParameter[0m, [31;1mtuple[0m, [31;1mtuple[0m, [31;1mtuple[0m, [32;1mint[0m)


In [None]:
# Write train and test lists to a CSV file
with open('leaky_relu_model2.3.4.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Training Loss', 'Test Loss', 'Train Accuracy', 'Test Accuracy', 'Time'])
    rows = zip(train_losses, test_losses, train_acc, test_acc, time_list)
    writer.writerows(rows)