In [None]:
#Moshe Hershkovitz 313123150
#Zachary Berrih 328595194

In [None]:
# for inline plots in jupyter
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random  as rnd
from sklearn.model_selection import train_test_split 
np.random.seed(42)  # fixing seed. Important for reproducibility!


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torchvision import models

In [None]:
def load_data(train_batch_size, test_batch_size, val_part, seed):
    mean, std = (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)
    # transform data: scaling, augmentation, ...
    train_transform = transforms.Compose([transforms.ToTensor(),
                                          transforms.Normalize(mean, std),
                                          transforms.RandomCrop((64,64)),
                                          transforms.ColorJitter(brightness=4, saturation=2, hue=2),
                                          transforms.RandomHorizontalFlip(p=0.5)])

    test_transform = transforms.Compose([transforms.ToTensor(),
                                         transforms.Normalize(mean, std),
                                         transforms.CenterCrop((64,64))])


    # pytorch has a datasets class with predefined datasets that can be easily downloaded and manipulated
    train = torchvision.datasets.STL10('./data', split='train', download=True,transform=train_transform)
    val  = torchvision.datasets.STL10('./data', split='train', download=True,transform=test_transform)
    test = torchvision.datasets.STL10('./data', split='test',download=True, transform=test_transform)

    # subsample the training set to make it more interesting
    #part_train = torch.utils.data.random_split(train, [nsamples, len(train)-nsamples])[0]

    # Dataloader combines a dataset and a sampler, and provides an iterable over the given dataset
    # Here I set num_workers to 1. Set it to 4 when working in computational rich environments.
    train_loader = torch.utils.data.DataLoader(train, batch_size=train_batch_size, shuffle=True, num_workers=1)
    val_loader  = torch.utils.data.DataLoader(val, batch_size=test_batch_size, shuffle=True, num_workers=1)
    test_loader = torch.utils.data.DataLoader(test, batch_size=test_batch_size, num_workers=1)

    

    return train_loader, val_loader, test_loader

In [None]:
### Visualizing the data

In [None]:
def imshow(img,label):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    #plt.title('label {}'.format(label))
    plt.title(classes[label])
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    #plt.show()

In [None]:
def visualization(classes):
    mean, std = (0.43, 0.42, 0.39), (0.27, 0.26, 0.27)
    transform = transforms.Compose([transforms.ToTensor(),
                                        transforms.Normalize(mean, std)])
    trainset = torchvision.datasets.STL10(root='./data', split='train',
                                         download=True, transform=vis_transform)
  
    n_samples = len(trainset)
    plt.figure(figsize=(10,30))
    for index, c in enumerate(classes):
        counter = 0
        while counter < 4:
            random_index = int(np.random.random()*n_samples)
            images, label = trainset[random_index]
            if label == index:
                plt.subplot(10,4, (counter+1) +label*4)
                counter += 1
                imshow(torchvision.utils.make_grid(images),label)            
    plt.show()


In [None]:
visualization(classes)

In [None]:
### Logistic regression over flattened version of the images

In [None]:
class LogisticRegression(nn.Module):
    def __init__(self):
        super(LogisticRegression, self).__init__()
        self.flat = nn.Flatten(start_dim=1)
        self.fc = nn.Linear(64 * 64 * 3, 10)

    def forward(self, x):
        x = self.flat(x)
        x = self.fc(x)
        return x

In [None]:
### Fully-connected NN

In [None]:
class NN3Layers(nn.Module):
    #64*64*3 = 12288
    def __init__(self, image_size=12288):
        super(NN3Layers, self).__init__()
        # first layer
        self.flat = nn.Flatten(start_dim=1)
        self.fc1 = nn.Linear(image_size, 1500, bias=True)
        self.BN1 = nn.BatchNorm1d(1500)
        self.D1 = nn.Dropout(p=0.5)
        self.ReLU = nn.ReLU()
        # second layer
        self.fc2 = nn.Linear(1500, 500, bias=True)
        self.BN2 = nn.BatchNorm1d(200)
        self.Tanh2 = nn.Tanh()
        self.D2 = nn.Dropout(p=0.5)
        # third layer
        self.fc3 = nn.Linear(500, 100, bias=True)
        self.BN3 = nn.BatchNorm1d(100)
        self.Tanh3 = nn.Tanh()
        self.D3 = nn.Dropout(p=0.5)
        self.fcend = nn.Linear(100, 10, bias=True) 

    def forward(self, x):
        x = self.flat(x)                    
        # first layer
        x = self.fc1(x)
        x = self.BN1(x)
        x = self.ReLU(x)
        
        # second layer
        x = self.fc2(x)
        x = self.BN2(x)
        x = self.Tanh2(x)
        x = self.D2(x)
        
        # third layer
        x = self.fc3(x)
        x = self.BN3(x)
        x = self.Tanh3(x)
        x = self.D3(x)
                             
        logits = self.fcend(x)
        return logits

In [None]:
### CNN

In [None]:
##class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)#50*50*6
        self.BN1 = nn.BatchNorm1d(6)
        self.pool = nn.MaxPool2d(2, 2)#25*25
        self.conv2 = nn.Conv2d(6, 10, 2)#24*24*10
        self.BN2 = nn.BatchNorm1d(10)
        self.conv3 = nn.Conv2d(10, 32, 3)#22*22*32
        self.BN3 = nn.BatchNorm2d(32)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(64 * 7 * 7, 100)
        self.d1 = nn.Dropout(p=0.2)
        self.fc2 = nn.Linear(100, 10)
        self.d2 = nn.Dropout(p=0.2)
       
    def forward(self, x):
        x = self.pool(F.relu(self.BN1(self.conv1(x))))
        x = self.pool(F.relu(self.BN2(self.conv2(x))))
        x = self.pool(F.relu(self.BN3(self.conv3(x))))
        x = x.view(-1, 64 * 7 * 7)
        x = self.d1(F.relu(self.fc1(x)))
        x = self.d2(self.fc2(x))
        return x

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__() #  64*64*3
        self.conv1 = nn.Conv2d(3, 6, 5) # #50*50*6
        self.bn1 = nn.BatchNorm2d(6)
        self.pool1 = nn.MaxPool2d(2, 2) #  25*25*6
        self.conv2 = nn.Conv2d(6, 10, 2) #  24*24*10
        self.bn2 = nn.BatchNorm2d(10)
        self.pool2 = nn.MaxPool2d(2, 2) #  12*12*10
        self.conv3 = nn.Conv2d(10, 64, 1) #  12*12*64
        self.bn3 = nn.BatchNorm2d(64)
        self.pool3 = nn.MaxPool2d(2, 2) #  6*6*64
        self.fc1 = nn.Linear(64 * 6 * 6, 100) #  100
        self.d1 = nn.Dropout(p=0.2)
        self.fc2 = nn.Linear(100, 10) #  10
        self.d2 = nn.Dropout(p=0.2)

    def forward(self, x):
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        x = self.pool3(F.relu(self.bn3(self.conv3(x))))
        x = x.view(-1, 64 * 6 * 6) # matrix data to flat vector
        x = self.d1(F.relu(self.fc1(x)))
        x = self.d2(self.fc2(x))
        return x

In [None]:
###A fixed pre-trained MobileNetV2

In [None]:
class MobileNetV2(nn.Module):
    def __init__(self):
        super(MobileNetV2, self).__init__()
        self.feature_extractor = models.mobilenet.mobilenet_v2(pretrained=True)
        for param in self.feature_extractor.parameters():
            param.requires_grad = False
        self.feature_extractor.classifier[1] = nn.Linear(self.feature_extractor.classifier[1].in_features, out_features=200)
       
        self.d1 = nn.Dropout(p=0.2
                            )
        self.fc2 = nn.Linear(200, 20) 
        self.d2 = nn.Dropout(p=0.2)
        self.fc3 = nn.Linear(20, 10) 
        self.d3 = nn.Dropout(p=0.2) 
        
    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.d1(F.relu(x)) 
        x = self.d2(F.relu(self.fc2(x)))
        output = self.d3(self.fc3(x))
        return output

In [None]:
class MobileNetV2_2(nn.Module):
    def __init__(self):
        super(MobileNetV2_2, self).__init__()
        self.feature_extractor = models.mobilenet.mobilenet_v2(pretrained=True) 
        self.feature_extractor.classifier[1] = nn.Linear(self.feature_extractor.classifier[1].in_features, out_features=200)
       
        self.d1 = nn.Dropout(p=0.2
                            )
        self.fc2 = nn.Linear(200, 20) 
        self.d2 = nn.Dropout(p=0.2)
        self.fc3 = nn.Linear(20, 10) 
        self.d3 = nn.Dropout(p=0.2) 
        
    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.d1(F.relu(x)) 
        x = self.d2(F.relu(self.fc2(x)))
        output = self.d3(self.fc3(x))
        return output

In [None]:
###Running

In [None]:
def run(train_loader, val_loader, model, device, hyper_parameters):
    epochs, lr, momentum, weight_decay, optimizer_type = hyper_parameters
    # loss function - cross entropy
    criterion = nn.CrossEntropyLoss()

    # optimizer type
    if optimizer_type == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
    elif optimizer_type == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=weight_decay)
    elif optimizer_type == 'RMSProp':
         optimizer = optim.RMSProp(model.parameters(), lr=lr, alpha=0.99, eps=1e-08, weight_decay=weight_decay)
    else:
         NotImplementedError("optimizer not implemented")
    train_losses, train_accuracy, val_losses, val_accuracy = ([] for i in range(4))
    for epoch in range(1, epochs + 1):
        model.train()
        cumm_loss = 0
        num_batch = 0
        train_cor = 0.
        train_tot = 0.

        # iterate over the data
        for (data, target) in train_loader:
            num_batch += 1

            # flatten the data tensor and move it to the GPU (when using a GPU)
            data, target = data.to(device), target.to(device)

            # Important! sets the gradients of all optimized torch.Tensors to zero. This is because by default,
            # gradients are accumulated in buffers( i.e, not overwritten) whenever .backward() is called.
            optimizer.zero_grad()

            # pass data through the model
            logits = model(data)

            # suffer loss
            loss = criterion(logits, target)

            # Use autograd to compute the backward pass. This call will compute the gradient of loss with respect
            # to all Tensors with requires_grad=True. This operation will free the computation graph
            loss.backward()

            # Calling the step function on an Optimizer makes an update to its parameters
            optimizer.step()

            cumm_loss += loss.item()
            _, predicted = torch.max(logits.data, 1)
            train_tot += target.size(0)
            train_cor += (predicted == target).sum().item()

        #claculate loss, and accuracy of the train and the validation, and print
        train_losses.append(cumm_loss / num_batch)
        train_accuracy.append(train_cor / train_tot)
        curr_val_accuracy, curr_val_loss = test(val_loader, model, device)
        val_accuracy.append(curr_val_accuracy)
        val_losses.append(curr_val_loss)
        print('epoch %d \tloss: %.3f\t  acc:%.3f\t val_acc:%.3f' %
            (epoch, cumm_loss / num_batch, train_cor / train_tot, curr_val_accuracy))
    results = train_accuracy, train_losses, val_accuracy, val_losses

    return model, results

In [None]:
# test function for validation and test
def test(test_loader, model, device):

    # will notify all your layers that you are in eval mode, that way, 
    # batchnorm or dropout layers will work in eval mode instead of training mode
    model.eval()
    
    test_loss = 0
    correct = 0
    num_batch = 0
    criterion = nn.CrossEntropyLoss()
    total = 0
    
    # torch.no_grad() impacts the autograd engine and deactivate it. 
    # It will reduce memory usage and speed up computations
    with torch.no_grad():
        for images, labels in test_loader:
            num_batch += 1
            images, labels= images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            # calculate the correct prediction
            correct += (predicted == labels).sum().item()
            # calculate the loss
            loss = criterion(outputs, labels)
            test_loss += loss.item()

    return correct / total, test_loss / num_batch

In [None]:
def plot_loss(results, epochs, lr, momentum, weight_decay, model_type):

    train_accuracy, train_losses, val_accuracy, val_losses = results
    steps = np.arange(epochs)

    fig, ax1 = plt.subplots()

    ax1.set_xlabel('epochs')
    ax1.set_ylabel('loss')
    ax1.plot(steps, train_losses, label="train loss", color='red')
    ax1.plot(steps, val_losses, label="val loss", color='green')

    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    ax2.set_ylabel('accuracy')  # we already handled the x-label with ax1
    ax2.plot(steps, train_accuracy, label="train acc", color='black')
    ax2.plot(steps, val_accuracy, label="val acc", color='blue')

    plt.suptitle(model_type, fontsize = 16)
    plt.title('epochs={}, learning rate={}, momentum={}, weight decay={}'.format(epochs, lr, momentum, weight_decay), fontsize = 10)

    fig.legend()
    fig.tight_layout()
    plt.show()
    
    print (" train accuracy is :" + str(np.mean(train_accuracy)))

In [None]:
batch_size = 64
test_batch_size = 64
epochs = 100
lr = 0.001 # 0.001 - LR. 0.1 - NN3, CNN, MobileNetV2.
cuda = False
seed = 42
optimizer_type = 'SGD' # SGD, Adam, RMSprop
momentum = 0.0
weight_decay = 1e-4
validation_part = 0.2
classes = ('Airplane', 'Bird', 'Car', 'Cat', 'Deer', 'Dog', 'Horse', 'Monkey', 'Ship', 'Truck')

use_cuda = cuda and torch.cuda.is_available()

# Set seed 
np.random.seed(seed)
torch.manual_seed(seed)
if use_cuda:
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
# Handel GPU stochasticity
torch.backends.cudnn.enabled = use_cuda
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

device = torch.device("cuda" if use_cuda else "cpu")

hyper_parameters = epochs, lr, momentum, weight_decay, optimizer_type

train_loader, val_loader, test_loader = load_data(batch_size, test_batch_size, validation_part, seed)


# model selection  
model = LogisticRegression().to(device)
#model = NN3Layers().to(device)
#model = CNN().to(device)
#model = MobileNetV2().to(device)  
#model = MobileNetV2_2().to(device)

# run the model
model, results = run(train_loader, val_loader, model, device, hyper_parameters)
test_acc, _ = test(test_loader, model, device)
print("test acc:", test_acc)
plot_loss(results, epochs, lr, momentum, weight_decay, "LogisticRegression")


In [None]:
batch_size = 32
test_batch_size = 32
epochs = 100
lr = 0.01 
cuda = False
seed = 42
optimizer_type = 'RMSprop' # SGD, Adam, RMSprop
momentum = 0.01
weight_decay = 1e-4
validation_part = 0.2
classes = ('Airplane', 'Bird', 'Car', 'Cat', 'Deer', 'Dog', 'Horse', 'Monkey', 'Ship', 'Truck')

use_cuda = cuda and torch.cuda.is_available()

# Set seed 
np.random.seed(seed)
torch.manual_seed(seed)
if use_cuda:
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
# Handel GPU stochasticity
torch.backends.cudnn.enabled = use_cuda
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

device = torch.device("cuda" if use_cuda else "cpu")

hyper_parameters = epochs, lr, momentum, weight_decay, optimizer_type

train_loader, val_loader, test_loader = load_data(batch_size, test_batch_size, validation_part, seed)


# model selection  
#model = LogisticRegression().to(device)
model = NN3Layers().to(device)
#model = CNN().to(device)
#model = MobileNetV2().to(device)  
#model = MobileNetV2_2().to(device)

# run the model
model, results = run(train_loader, val_loader, model, device, hyper_parameters)
test_acc, _ = test(test_loader, model, device)
print("test acc:", test_acc)
plot_loss(results, epochs, lr, momentum, weight_decay, "NN3Layers")


In [None]:
batch_size = 64
test_batch_size = 64
epochs = 100
lr = 0.1 # 0.001 - LR. 0.1 - NN3, CNN, MobileNetV2.
cuda = False
seed = 42
optimizer_type = 'SGD' # SGD, Adam, RMSprop
momentum = 0.0
weight_decay = 1e-4
validation_part = 0.2
classes = ('Airplane', 'Bird', 'Car', 'Cat', 'Deer', 'Dog', 'Horse', 'Monkey', 'Ship', 'Truck')

use_cuda = cuda and torch.cuda.is_available()

# Set seed 
np.random.seed(seed)
torch.manual_seed(seed)
if use_cuda:
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
# Handel GPU stochasticity
torch.backends.cudnn.enabled = use_cuda
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

device = torch.device("cuda" if use_cuda else "cpu")

hyper_parameters = epochs, lr, momentum, weight_decay, optimizer_type

train_loader, val_loader, test_loader = load_data(batch_size, test_batch_size, validation_part, seed)


# model selection  
#model = LogisticRegression().to(device)
#model = NN3Layers().to(device)
model = CNN().to(device)
#model = MobileNetV2().to(device)  
#model = MobileNetV2_2().to(device)

# run the model
model, results = run(train_loader, val_loader, model, device, hyper_parameters)
test_acc, _ = test(test_loader, model, device)
print("test acc:", test_acc)
plot_loss(results, epochs, lr, momentum, weight_decay, "CNN")


In [None]:
batch_size = 64
test_batch_size = 64
epochs = 100
lr = 0.1 # 0.001 - LR. 0.1 - NN3, CNN, MobileNetV2.
cuda = False
seed = 42
optimizer_type = 'SGD' # SGD, Adam, RMSprop
momentum = 0.0
weight_decay = 1e-4
validation_part = 0.2
classes = ('Airplane', 'Bird', 'Car', 'Cat', 'Deer', 'Dog', 'Horse', 'Monkey', 'Ship', 'Truck')

use_cuda = cuda and torch.cuda.is_available()

# Set seed 
np.random.seed(seed)
torch.manual_seed(seed)
if use_cuda:
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
# Handel GPU stochasticity
torch.backends.cudnn.enabled = use_cuda
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

device = torch.device("cuda" if use_cuda else "cpu")

hyper_parameters = epochs, lr, momentum, weight_decay, optimizer_type

train_loader, val_loader, test_loader = load_data(batch_size, test_batch_size, validation_part, seed)


# model selection  
#model = LogisticRegression().to(device)
#model = NN3Layers().to(device)
#model = CNN().to(device)
model = MobileNetV2().to(device)  
#model = MobileNetV2_2().to(device)

# run the model
model, results = run(train_loader, val_loader, model, device, hyper_parameters)
test_acc, _ = test(test_loader, model, device)
print("test acc:", test_acc)
plot_loss(results, epochs, lr, momentum, weight_decay, "MobileNetV2")


In [None]:
batch_size = 64
test_batch_size = 64
epochs = 100
lr = 0.1 # 0.001 - LR. 0.1 - NN3, CNN, MobileNetV2.
cuda = False
seed = 42
optimizer_type = 'SGD' # SGD, Adam, RMSprop
momentum = 0.0
weight_decay = 1e-4
validation_part = 0.2
classes = ('Airplane', 'Bird', 'Car', 'Cat', 'Deer', 'Dog', 'Horse', 'Monkey', 'Ship', 'Truck')

use_cuda = cuda and torch.cuda.is_available()

# Set seed 
np.random.seed(seed)
torch.manual_seed(seed)
if use_cuda:
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
# Handel GPU stochasticity
torch.backends.cudnn.enabled = use_cuda
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

device = torch.device("cuda" if use_cuda else "cpu")

hyper_parameters = epochs, lr, momentum, weight_decay, optimizer_type

train_loader, val_loader, test_loader = load_data(batch_size, test_batch_size, validation_part, seed)


# model selection  
#model = LogisticRegression().to(device)
#model = NN3Layers().to(device)
#model = CNN().to(device)
#model = MobileNetV2().to(device)  
model = MobileNetV2_2().to(device)

# run the model
model, results = run(train_loader, val_loader, model, device, hyper_parameters)
test_acc, _ = test(test_loader, model, device)
print("test acc:", test_acc)
plot_loss(results, epochs, lr, momentum, weight_decay, "MobileNetV2_2")
