In [1]:
%matplotlib widget

In [2]:
from comet_ml import Experiment
experiment = Experiment(api_key="6tGmiuOfY08czs2b4SHaHI2hw",
                        project_name="multi-campaigns", workspace="vprzybylo")


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/vprzybylo/multi-campaigns/ee93461103f3482892b60d1167e9e06c



In [13]:
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'

import copy
import datetime
import itertools
from natsort import natsorted
import numpy as np
import time
import os
import pandas as pd
import random

import torch
from torch import nn
import torchvision
from torch.utils.data import DataLoader, Sampler
from torch.utils.data import Dataset
from torchvision import datasets, transforms, models
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler
from torch import optim
from torch.optim import lr_scheduler, Adam
from torchvision.utils import save_image
from torch.optim.lr_scheduler import ReduceLROnPlateau

from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import confusion_matrix, classification_report

import PIL
from PIL import Image
from PIL import ImageFile
from pathlib import Path
ImageFile.LOAD_TRUNCATED_IMAGES = True

from bokeh.plotting import figure
from bokeh.io import show, output_notebook
from bokeh.models import LinearAxis, Range1d

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import seaborn as sns


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
plt_params = {'axes.labelsize': 'xx-large',
         'axes.titlesize':'xx-large',
         'xtick.labelsize':'x-large',
         'ytick.labelsize':'xx-large'}
plt.rcParams.update(plt_params)

In [15]:
class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

### equal pull from classes

In [16]:
def make_weights_for_balanced_classes(train_imgs, nclasses):   
    #only weight the training dataset 
    
    class_sample_counts = [0] * nclasses                                                      
    for item in train_imgs:  
        class_sample_counts[item[1]] += 1      
    print('counts per class: ', class_sample_counts)
    
#     weight_per_class = [0.] * nclasses                                      
#     N = float(sum(class_sample_counts))                                                   
#     for i in range(nclasses): 
#         weight_per_class[i] = N/float(class_sample_counts[i])                                 
#     weight = [0] * len(images)                                              
#     for idx, val in enumerate(images):    
#         weight[idx] = weight_per_class[val[1]]  
        
    class_weights = 1./torch.Tensor(class_sample_counts)
    train_targets = [sample[1] for sample in train_imgs]
    train_samples_weights = [class_weights[class_id] for class_id in train_targets]

    return class_sample_counts, torch.DoubleTensor(train_samples_weights)

In [17]:
def make_histogram_classcounts(class_names, class_counts):
    fig, ax = plt.subplots(figsize=(8,5))    

    width = 0.75 # the width of the bars 
    ind = np.arange(len(class_counts))  # the x locations for the groups
    ax.barh(class_names, class_counts, width, color="blue", align='center', tick_label=class_names)
    #ax.set_yticks(ind+width/2)
    #plt.xticks(rotation=-90, ha='center')
    
    for i, v in enumerate(class_counts):
        ax.text(v, i-.1, str(v), color='blue')
    ax.set_xlabel("Count")
    #ax.set_xlim(0,2500)
    plt.savefig('../plots/class_counts.png', dpi=300, format='png', bbox_inches='tight')
    plt.show()

In [18]:
def load_split_train_val(class_names, datadir, batch_size, show_sample=True, num_workers=32, valid_size = .8):
    
    all_transforms = transforms.Compose([transforms.Resize(224),
                        transforms.ToTensor(),
                        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    
    all_data_wpath = ImageFolderWithPaths(datadir,transform=all_transforms) #custom dataset that includes entire path
    
#     num_train = len(all_data_wpath)
#     indices = list(range(num_train))
#     split = int(np.floor(valid_size * num_train))
#     np.random.shuffle(indices)
#     train_idx, val_idx = indices[split:], indices[:split-1]
    
#     train_data = torch.utils.data.Subset(all_data_wpath, train_idx)
#     val_data = torch.utils.data.Subset(all_data_wpath, val_idx)
    
    train_length = int(valid_size*len(all_data_wpath))
    val_length = len(all_data_wpath)-train_length
    train_data, val_data = torch.utils.data.random_split(all_data_wpath,(train_length,val_length))
    #print(len(train_data), len(val_data))
    
    # For an unbalanced dataset we create a weighted sampler              
    class_counts, train_samples_weights = make_weights_for_balanced_classes(train_data.dataset.imgs, len(range(num_classes)))                                                                   
    make_histogram_classcounts(class_names, class_counts)
    
    train_sampler = torch.utils.data.sampler.WeightedRandomSampler(train_samples_weights, 
                                                                   len(train_samples_weights),
                                                                   replacement=True)                     
    trainloader = torch.utils.data.DataLoader(train_data.dataset, batch_size=batch_size,                         
                                            sampler = train_sampler, num_workers=num_workers, pin_memory=True)    
    
    val_sampler = SubsetRandomSampler(val_data.indices)                 
    valloader = torch.utils.data.DataLoader(val_data.dataset, batch_size=batch_size,                             
                                            sampler = val_sampler, num_workers=num_workers, pin_memory=True)  

#     val_samples_weights = make_weights_for_balanced_classes(val_data.dataset.imgs, len(range(num_classes)))                                                                   
    
#     val_sampler = torch.utils.data.sampler.WeightedRandomSampler(val_samples_weights, 
#                                                                    len(val_samples_weights),
#                                                                    replacement=True)                     
#     valloader = torch.utils.data.DataLoader(val_data.dataset, batch_size=batch_size,                              
#                                             sampler = val_sampler, num_workers=num_workers, pin_memory=True)    
    
    if show_sample:
        show_sample(train_data, train_sampler)
        
    torch.save(valloader, 'val_loader.pth')
            
    return trainloader, valloader

In [19]:
def show_sample(train_data, train_sampler):
    
    batch_size_sampler=20
    sample_loader = torch.utils.data.DataLoader(train_data.dataset, batch_size=batch_size_sampler, \
                                                sampler = train_sampler, num_workers=1, drop_last=True)

    data_iter = iter(sample_loader)

    images, labels, paths = data_iter.next()
    fig, ax = plt.subplots(batch_size_sampler//5, 5, figsize=(10, 8))

    for j in range(images.size()[0]):
        
        # Undo preprocessing
        image = images[j].permute(1, 2, 0).cpu().numpy()
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])

        image = std * image + mean

        # Image needs to be clipped between 0 and 1 or it looks like noise when displayed
        image = np.clip(image, 0, 1)
        ax = ax.flatten()
        ax[j].set_title(str(class_names[labels[j]]))
        ax[j].axis('off') 
        ax[j].imshow(image)
    plt.show()

In [20]:
def get_test_loader(datadir,
                    batch_size,
                    num_workers,
                    shuffle=True,
                    pin_memory=True):
    """
    Utility function for loading and returning a multi-process
    test iterator 
    If using CUDA, num_workers should be set to 1 and pin_memory to True.
    Params
    ------
    - data_dir: path directory to the dataset.
    - batch_size: how many samples per batch to load.
    - shuffle: whether to shuffle the dataset after every epoch.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.
    Returns
    -------
    - data_loader: test set iterator.
    """
    transforms_ = transforms.Compose([transforms.Resize(224),  #resizing helps memory usage
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    
    all_data_wpath = ImageFolderWithPaths(datadir,transform=transforms_)

    testloader = torch.utils.data.DataLoader(all_data_wpath,pin_memory=True,shuffle=shuffle,
                    batch_size=batch_size, num_workers=num_workers)  

    return testloader

In [21]:
# Flag for feature extracting. When False, we finetune the whole model,
#   when True we only update the reshaped layer params
def set_parameter_requires_grad(model, feature_extract):
    if feature_extract:
        for param in model.parameters():
            param.requires_grad = False
            

In [22]:
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=False):

    # Initialize these variables which will be set in this if statement. Each of these
    # variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet18":
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224
        
    elif model_name == "resnet34":
        model_ft = models.resnet34(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "resnet152":
        model_ft = models.resnet152(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224
        
    elif model_name == "vgg16":
        """ VGG
        """
        model_ft = models.vgg16_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "vgg19":
        """ VGG
        """
        model_ft = models.vgg19_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_1(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(7,7), stride=(2,2))
        #model_ft.num_classes = num_classes
        input_size = 224

    elif model_name == "densenet169":
        """ Densenet
        """ 
        model_ft = models.densenet169(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224
        
    elif model_name == "densenet201":
        """ Densenet
        """ 
        model_ft = models.densenet201(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size

In [23]:
def tensorboard_logging(logger, loss, acc, step, model): 
    
    # 1. Log scalar values (scalar summary)
    info = { 'loss': loss, 'accuracy': acc}

    for tag, value in info.items():
        logger.scalar_summary(tag, value, step+1)

    # 2. Log values and gradients of the parameters (histogram summary)
    for tag, value in model.named_parameters():
        tag = tag.replace('.', '/')
        logger.histo_summary(tag, value.data.cpu().numpy(), step+1)
        logger.histo_summary(tag+'/grad', value.grad.data.cpu().numpy(), step+1)

    # 3. Log training images (image summary)
    #         denormalize = transforms.Normalize((-1,), (1 / 0.5,))
    #         info = { 'images': demormalize(images)[:10].cpu().numpy() }

    #         for tag, images in info.items():
    #             logger.image_summary(tag, images, i+1)

In [24]:
def train_model(model_name, savename, dataloaders_dict, epochs, num_classes, experiment, is_inception, feature_extract=False):
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    #logger_train = Logger('./logs/'+current_time+'/train/')
    #logger_val = Logger('./logs/'+current_time+'/val/')
    model, input_size = initialize_model(model_name=model_name, num_classes=num_classes, feature_extract=feature_extract, use_pretrained=False)
    
    def set_dropout(model, drop_rate=0.1):
        for name, child in model.named_children():
            
            if isinstance(child, torch.nn.Dropout):
                child.p = drop_rate
            set_dropout(child, drop_rate=drop_rate)
    set_dropout(model, drop_rate=0.0)
    print(model)
    
#     model.classifier = nn.Sequential(*[model.classifier()[i] for i in range(7) if i != 2 and i !=5])
#     print(model.classifier())

    #feature extract False for all layers to be updated
   
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(torch.cuda.is_available())
    # Send the model to GPU
    if torch.cuda.device_count() > 1:
        print("Using", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)
    model = model.to(device)

    # Gather the parameters to be optimized/updated in this run. If we are
    #  finetuning we will be updating all parameters. However, if we are
    #  doing feature extract method, we will only update the parameters
    #  that we have just initialized, i.e. the parameters with requires_grad
    #  is True.
    params_to_update = model.parameters()
    print("Params to learn:")
    if feature_extract:
        params_to_update = []
        
        for name,param in model.named_parameters():
            if param.requires_grad == True:
                params_to_update.append(param)
                #print("\t",name)
    #else:
        #for name,param in model.named_parameters():
            #if param.requires_grad == True:
                #print("\t",name)

    optimizer = optim.Adam(model.parameters(), lr=0.01)
    # step_size: at how many multiples of epoch you decay
    # step_size = 1, after every 1 epoch, new_lr = lr*gamma 
    # step_size = 2, after every 2 epoch, new_lr = lr*gamma 
    # gamma = decaying factor
    #scheduler = StepLR(optimizer, step_size=1, gamma=0.1)
    
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=0, verbose=True, eps=1e-04)
    print(scheduler)
    # Setup the loss fxn
    criterion = nn.CrossEntropyLoss() #expects integer labels not one-hot encoded
     
    val_acc_history = []
    train_acc_history = []
    val_loss_history = []
    train_loss_history = []
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc_val = 0.0
    since_total = time.time()
    
    step = 0
    label_counts = [0]*len(range(num_classes))
    for epoch in range(epochs):
        since_epoch = time.time()
        #print('Epoch {}/{}'.format(epoch+1,num_epochs))
        print('-' * 20)
        
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            print('Phase: {}'.format(phase))
            totals_train = 0
            totals_val = 0
            running_loss_train = 0.0
            running_loss_val = 0.0
            running_corrects_train = 0
            running_corrects_val = 0
            
            if phase == 'train':
                model.train() 
                #logger = logger_train
                
            else:
                model.eval()   
                #logger = logger_val
            
            
            # Iterate over data.
            for i, (inputs, labels, paths) in enumerate(dataloaders_dict[phase]):
                for n in range(len(range(num_classes))):
                    label_counts[n] += len(np.where(labels.numpy() == n)[0])
                    
#                 for n in range(len(range(num_classes))):
#                     print("batch index {}, {} counts: {}".format(
#                         i, n, (labels == n).sum()))

                
#                print('LABEL COUNT = ', label_counts)

                inputs = inputs.to(device)
                labels = labels.to(device)
                #print(inputs.device)
                
                # zero the parameter gradients
                optimizer.zero_grad() # a clean up step for PyTorch

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # makes sure to clear the intermediate values for evaluation
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward() # compute updates for each parameter
                        optimizer.step() # make the updates for each parameter                        

                
                if phase == 'train':
                    #Batch accuracy and loss statistics   
                    batch_loss_train = loss.item() * inputs.size(0)     
                    batch_corrects_train = torch.sum(preds == labels.data) 
                    #tensorboard_logging(logger, batch_loss_train, labels, batch_corrects_train, step, model)
                    
                    #for accuracy and loss statistics overall 
                    running_loss_train += loss.item() * inputs.size(0)
                    running_corrects_train += torch.sum(preds == labels.data)
                    totals_train += labels.size(0)
                    
                    if (i+1) % 5 == 0:
                        print("Training, Batch {}/{}, Loss: {:.3f}, Accuracy: {:.3f}".format(i+1,\
                                                                      len(dataloaders_dict[phase]), \
                                                                      batch_loss_train/labels.size(0), \
                                                                      float(batch_corrects_train)/labels.size(0)))

                    step += 1
                    
                else:
                    #Batch accuracy and loss statistics  
                    batch_loss_val = loss.item() * inputs.size(0)     
                    batch_corrects_val = torch.sum(preds == labels.data) 
                    
                    
                    #for accuracy and loss statistics overall
                    running_loss_val += loss.item() * inputs.size(0)
                    running_corrects_val += torch.sum(preds == labels.data)
                    totals_val += labels.size(0)
                    
                    if (i+1) % 3 == 0:
                        print("Validation, Batch {}/{}, Loss: {:.3f}, Accuracy: {:.3f}".format(i+1,\
                                                                      len(dataloaders_dict[phase]), \
                                                                      batch_loss_val/labels.size(0), \
                                                                      float(batch_corrects_val)/labels.size(0)))

            if phase == 'train':
                #epoch loss and accuracy stats    
                epoch_loss_train = running_loss_train / totals_train
                epoch_acc_train = running_corrects_train.double() / totals_train
                scheduler.step(epoch_acc_train) #reduce learning rate if not improving acc
                experiment.log_metric('train scheduler', scheduler)

                #with open('save_acc_loss_train_e50_bs128.csv', 'w', newline='') as file:
                #    writer = csv.writer(file)
                #    writer.writerow([model_name, epoch, epoch_acc_train, epoch_loss_train])

                print("Training Epoch {}/{}, Loss: {:.3f}, Accuracy: \033[1m {:.3f} \033[0m".format(epoch+1,epochs, epoch_loss_train, epoch_acc_train))
                #tensorboard_logging(logger, epoch_loss_train, epoch_acc_train, epoch, model)
                train_acc_history.append(epoch_acc_train)
                train_loss_history.append(epoch_loss_train)
                experiment.log_metric('epoch_acc_train', epoch_acc_train*100)
                experiment.log_metric('epoch_loss_train', epoch_loss_train)

            else: 
                epoch_loss_val = running_loss_val / totals_val
                epoch_acc_val = running_corrects_val.double() / totals_val
                scheduler.step(epoch_acc_val) #reduce learning rate if not improving acc
                experiment.log_metric('val scheduler', scheduler)
                
                #with open('save_acc_loss_val_e50_bs128.csv', 'w', newline='') as file:
                #    writer = csv.writer(file)
                #    writer.writerow([model_name, epoch, epoch_acc_val, epoch_loss_val])

                print("Validation Epoch {}/{}, Loss: {:.3f}, Accuracy: \033[1m {:.3f} \033[0m".format(epoch+1,epochs, epoch_loss_val, epoch_acc_val))
                #tensorboard_logging(logger, epoch_loss_val, epoch_acc_val, epoch, model)
                val_acc_history.append(epoch_acc_val)
                val_loss_history.append(epoch_loss_val)
                experiment.log_metric('epoch_acc_val', epoch_acc_val*100)
                experiment.log_metric('epoch_loss_val', epoch_loss_val)
                
                #deep copy the model
                if epoch_acc_val > best_acc_val:
                    best_acc_val = epoch_acc_val
                    best_model_wts = copy.deepcopy(model.state_dict())
                    # save/load best model weights
                    if savename is not None:
                        torch.save(model, savename+'_'+model_name)

        time_elapsed = time.time() - since_epoch
        print('Epoch complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))

    time_elapsed = time.time() - since_total
    print('All epochs comlete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    
    #with open('save_model_timing.csv', 'w', newline='') as file:
        #writer = csv.writer(file)
        #writer.writerow([model_name, time_elapsed])

    return model, train_acc_history, val_acc_history, train_loss_history, val_loss_history

# MAIN

In [25]:
def main():
    #file = 'batch_size'
    #writer = csv.writer(file)
    for batch_size in params['batch_size']:
        
        print('NEW BATCH SIZE: ', batch_size) 
        train_loader, val_loader = load_split_train_val(
            class_names=params['class_names'], 
            datadir=params['data_dir'],
            batch_size=batch_size,
            show_sample=False,
            num_workers=num_workers)

        dataloaders_dict = {'train': train_loader, 'val': val_loader}

        model_train_accs = []
        model_val_accs = []  
        model_train_loss = []
        model_val_loss = []
        for model_name in params['model_names']: 
            for epochs in params['max_epochs']:
                model_ft, train_acc_history, val_acc_history, train_loss_history, val_loss_history= train_model(
                    model_name,
                    params['savename'],
                    dataloaders_dict,
                    epochs, 
                    num_classes,
                    experiment,
                    is_inception=False
                )
                
                model_val_accs.append(val_acc_history)
                model_train_accs.append(train_acc_history)
                model_train_loss.append(train_loss_history)
                model_val_loss.append(val_loss_history)
                
                
    return model_name, model_train_accs, model_val_accs, model_train_loss, model_val_loss, train_loader, val_loader

In [27]:
if __name__ == '__main__':
        
    params = {'lr': [0.01],
        'batch_size': [128, 256, 512, 1024],
        'max_epochs': [20],
        'data_dir':'../cpi_data/training_datasets/hand_labeled_resized_multcampaigns_clean/',
        'optimizer':[torch.optim.Adam, torch.optim.Adagrad, torch.optim.Adadelta, torch.optim.Adamax],
        #'momentum': [0.9, 0.999], 
        'class_names':['aggregates','blank','blurry','budding','bullets','columns','compact irregulars',\
                       'fragments','needles','plates','rimed aggregates','rimed columns','spheres'],
        'model_names':['vgg19'],
        #'model_names':['resnet18', 'resnet34', 'resnet152', 'alexnet', 'vgg16', 'vgg19', 'densenet169', 'densenet201'],
        #'savename': '../saved_models/bs128_e50_13classes_clean'}
        'savename': None}

    experiment.log_parameters(params)
    experiment.log_code = True
    #experiment.add_tag('inlcudes bad data')
    #experiment.add_tag(' labels')
    num_workers = 0  #change to # of cores available to load images
    num_classes = len(params['class_names'])
    experiment.display()
    model_name, model_train_accs, model_val_accs, model_train_loss, model_val_loss, train_loader, val_loader = main()
   

NEW BATCH SIZE:  128
counts per class:  [2393, 2073, 3050, 778, 767, 1850, 1572, 1676, 481, 612, 1896, 1342, 1226]


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

KeyboardInterrupt: 

In [None]:
#ACCURACY PLOT for training and validation
fig = plt.figure(figsize=(12,5))
plt.subplot(1, 2, 1)
plt.plot(np.arange(1,(params['max_epochs'][0]+1)),[i.cpu().numpy()*100 for i in model_train_accs[0]], label='train')
plt.plot(np.arange(1,(params['max_epochs'][0]+1)),[i.cpu().numpy()*100 for i in model_val_accs[0]], label='validation')
 
plt.legend()
plt.xticks(np.arange(1, (params['max_epochs'][0]+1), 10.0))
plt.xlabel("Epoch")
plt.ylabel("Accuracy [%]")


#LOSS PLOT for training and validation
plt.subplot(1, 2, 2)
plt.plot(np.arange(1,(params['max_epochs'][0]+1)),[i for i in model_train_loss[0]], label='train')
plt.plot(np.arange(1,(params['max_epochs'][0]+1)),[i for i in model_val_loss[0]], label='validation')
plt.legend()
plt.xticks(np.arange(1, (params['max_epochs'][0]+1), 10.0))
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()

# Make Confusion Matrix - Validation

In [None]:
model = torch.load('../saved_models/vgg19_bs128_e20_13classes_clean').cuda()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Send the model to GPU
# if torch.cuda.device_count() > 1:
#     print("Let's use", torch.cuda.device_count(), "GPUs!")
#     model = nn.DataParallel(model)

all_preds= []
all_labels = []
with torch.no_grad():

    for batch_idx, (imgs, labels, img_paths) in enumerate(val_loader):
        # get the inputs
        inputs = imgs.to(device)
        labels = labels.to(device)

        output = model(inputs)
        pred = torch.argmax(output, 1)

        all_preds.append(pred.cpu().numpy())
        all_labels.append(labels.cpu().numpy())


In [None]:
#NORMALIZED

cm = confusion_matrix(np.asarray(list(itertools.chain(*all_preds))), np.asarray(list(itertools.chain(*all_labels))))
cmn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
fig, ax = plt.subplots(figsize=(13,9))

heat = sns.heatmap(cmn, annot=True, fmt='.2f', xticklabels=params['class_names'], yticklabels=params['class_names'], cmap="Blues")
heat.set_xticklabels(heat.get_xticklabels(), rotation=90, fontsize=18)
heat.set_yticklabels(heat.get_xticklabels(), rotation=0, fontsize=18)


plt.ylabel('Actual Labels', fontsize=20)
plt.xlabel('Predicted Labels', fontsize=20);
plt.savefig('../plots/norm_conf_matrix.pdf', dpi=300, bbox_inches='tight')

In [None]:
cm = confusion_matrix(np.asarray(list(itertools.chain(*all_preds))), np.asarray(list(itertools.chain(*all_labels))))
fig, ax = plt.subplots(figsize=(10,8))

heat = sns.heatmap(cm, annot=True, fmt='.2f', xticklabels=params['class_names'], yticklabels=params['class_names'], cmap="Blues")
heat.set_xticklabels(heat.get_xticklabels(), rotation=90)
plt.ylabel('Actual Labels', fontsize=20)
plt.xlabel('Predicted Labels', fontsize=20);

In [None]:
#Metrics classification report
classification_report(all_labels, all_preds, digits=3)

In [None]:
#transfer learning method
fig = plt.figure(figsize=(12,5))
num_epochs = 50
ax1 = plt.subplot(1, 2, 1)
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
colors = ['lightblue', 'blue','darkblue','gold','red', 'darkred', 'lightgreen', 'darkgreen']
for i, (model, train_accs, val_accs) in enumerate(zip(params['model_names'], model_train_accs, model_val_accs)):
    ax1.scatter(np.arange(1,(num_epochs+1)), [i.cpu().numpy()*100 for i in train_accs[:num_epochs]], c=colors[i], marker='*')
    ax1.scatter(np.arange(1,(num_epochs+1)), [i.cpu().numpy()*100 for i in val_accs[:num_epochs]], c=colors[i], marker='o', label=str(model))
plt.ylim(20,100)
plt.xlim(1,num_epochs)
ax1.legend(title='Model type:', loc='right', prop={'size': 10})
plt.xticks(np.arange(1, num_epochs+1, 2.0))
ax1.yaxis.set_ticks_position('both')
ax1.minorticks_on()
ax1.tick_params(axis='y', which='minor', direction='out')
ax1.xaxis.set_tick_params(which='minor', bottom=False)

ax2 = plt.subplot(1, 2, 2)
plt.xlabel("Epochs")
plt.ylabel("Loss")
for i, (model,train_loss, val_loss) in enumerate(zip(params['model_names'], model_train_loss, model_val_loss)):
    ax2.scatter(np.arange(1,(num_epochs+1)), [i for i in train_loss[:num_epochs]], c=colors[i], marker='*')
    ax2.scatter(np.arange(1,(num_epochs+1)), [i for i in val_loss[:num_epochs]], c=colors[i], marker='o', label=str(model))
ax2.legend(title='Model type:', loc='right', prop={'size': 10})
plt.ylim(0,2.4)
plt.xlim(1,num_epochs)
plt.xticks(np.arange(1, num_epochs+1, 2))
plt.tight_layout()
ax2.yaxis.set_ticks_position('both')
ax2.minorticks_on()
ax2.tick_params(axis='y', which='minor', direction='out')
ax2.xaxis.set_tick_params(which='minor', bottom=False)

# fig.savefig('cpi_data/OLYMPEX/plots/loss_acc_allmodels_reducelr_all_512_0dp.eps')
# fig.savefig('cpi_data/OLYMPEX/plots/loss_acc_allmodels_reducelr_all_512_0dp.png')

In [None]:
#transfer learning method
fig = plt.figure(figsize=(12,5))
num_epochs = 50
ax1 = plt.subplot(1, 2, 1)
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
colors = ['lightblue', 'blue','darkblue','gold','red', 'darkred', 'lightgreen', 'darkgreen']
for i, (model, train_accs, val_accs) in enumerate(zip(params['model_names'], model_train_accs, model_val_accs)):
    ax1.scatter(np.arange(1,(num_epochs+1)), [i.cpu().numpy()*100 for i in train_accs[:num_epochs]], c=colors[i], marker='*')
    ax1.scatter(np.arange(1,(num_epochs+1)), [i.cpu().numpy()*100 for i in val_accs[:num_epochs]], c=colors[i], marker='o', label=str(model))
plt.ylim(40,100)
plt.xlim(1,num_epochs)
ax1.legend(title='Model type:', loc='lower right', prop={'size': 10})
plt.xticks(np.arange(1, num_epochs+1, 2.0))
ax1.yaxis.set_ticks_position('both')
ax1.minorticks_on()
ax1.tick_params(axis='y', which='minor', direction='out')
ax1.xaxis.set_tick_params(which='minor', bottom=False)

ax2 = plt.subplot(1, 2, 2)
plt.xlabel("Epochs")
plt.ylabel("Loss")
for i, (model,train_loss, val_loss) in enumerate(zip(params['model_names'], model_train_loss, model_val_loss)):
    ax2.scatter(np.arange(1,(num_epochs+1)), [i for i in train_loss[:num_epochs]], c=colors[i], marker='*')
    ax2.scatter(np.arange(1,(num_epochs+1)), [i for i in val_loss[:num_epochs]], c=colors[i], marker='o', label=str(model))
ax2.legend(title='Model type:', loc='upper right', prop={'size': 10})
plt.ylim(0,2.4)
plt.xlim(1,num_epochs)
plt.xticks(np.arange(1, num_epochs+1, 2))
plt.tight_layout()
ax2.yaxis.set_ticks_position('both')
ax2.minorticks_on()
ax2.tick_params(axis='y', which='minor', direction='out')
ax2.xaxis.set_tick_params(which='minor', bottom=False)

#fig.savefig('../plots/loss_acc_allmodels_bs_128_e20_13classes.eps', dpi=300)
#fig.savefig('../plots/loss_acc_allmodels_bs_128_e20_13classes.pdf', dpi=300)

In [None]:
#fig = plt.figure(figsize=(12,5))
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15,10))

num_epochs = 20
ax1 = plt.subplot(2, 2, 1)

plt.xlabel("Epochs")
plt.ylabel("Train Accuracy")
colors = ['darkred', 'red', 'salmon', 'lightsalmon','bisque', 'lightgreen', 'darkgreen', 'lightblue']  
color_key = {'vgg19': 'darkred', 'vgg16': 'red', 'resnet34':'salmon', 'resnet18':'lightsalmon', 'resnet152':'bisque',
            'densenet201':'lightgreen', 'densenet169':'darkgreen', 'alexnet':'lightblue'}

for epoch in range(num_epochs):
    train_acc = [i.cpu().numpy()*100 for i in np.array(model_train_accs)[:,epoch]]
    model_names, train_acc, colors_sorted = (list(x) for x in zip(*sorted(zip(params['model_names'], train_acc, colors), reverse=True)))
    colors_sorted = ['darkred', 'red', 'salmon', 'lightsalmon','bisque', 'lightgreen', 'darkgreen', 'lightblue']  
    if epoch == params['max_epochs'][0]-1: 
        for m, model_name in enumerate(model_names):
            p1 = ax1.bar(epoch+1, train_acc[m], color=colors_sorted[m], label=model_name)
            
    else:
        p1 = ax1.bar(epoch+1, train_acc, color=colors_sorted)
        
plt.ylim(0,100)
plt.xlim(0,num_epochs+1)
ax1.legend(title='Model type:', loc='right', prop={'size': 10})
plt.xticks(np.arange(1, num_epochs+1, 2.0))
ax1.yaxis.set_ticks_position('both')
ax1.minorticks_on()
ax1.tick_params(axis='y', which='minor', direction='out')
ax1.xaxis.set_tick_params(which='minor', bottom=False)

ax2 = plt.subplot(2, 2, 2)
plt.xlabel("Epochs")
plt.ylabel("Train Loss")
for epoch in range(num_epochs):
    train_loss = [i for i in np.array(model_train_loss)[:,epoch]]
    train_loss, model_names = (list(x) for x in zip(*sorted(zip(train_loss, params['model_names']), reverse=True)))
    
    if epoch == params['max_epochs'][0]-1: 
    #if epoch == 0:
        for m, model_name in enumerate(model_names):
            p1 = ax2.bar(epoch+1, train_loss[m], color=color_key[model_name], label=model_name)
    else:
        for m, model_name in enumerate(model_names):
            p1 = ax2.bar(epoch+1, train_loss[m], color=color_key[model_name])

plt.xlim(0,num_epochs+1)
#plt.ylim(0,2)
ax2.legend(title='Model type:', loc='right', prop={'size': 10})
plt.xticks(np.arange(1, num_epochs+1, 2.0))
ax2.yaxis.set_ticks_position('both')
ax2.minorticks_on()
ax2.tick_params(axis='y', which='minor', direction='out')
ax2.xaxis.set_tick_params(which='minor', bottom=False)

ax3 = plt.subplot(2, 2, 3)

plt.xlabel("Epochs")
plt.ylabel("Validation Accuracy")
colors_sorted = ['darkred', 'red', 'salmon', 'lightsalmon','bisque', 'lightgreen', 'darkgreen', 'lightblue']  

#colors = plt.cm.rainbow(np.linspace(0,1,9))
for epoch in range(num_epochs):
    val_acc = [i.cpu().numpy()*100 for i in np.array(model_val_accs)[:,epoch]]
    model_names, val_acc, colors_sorted = (list(x) for x in zip(*sorted(zip(params['model_names'], val_acc, colors), reverse=True)))

    if epoch == params['max_epochs'][0]-1: 
        for m, model_name in enumerate(model_names):
            p1 = ax3.bar(epoch+1, val_acc[m], color=color_key[model_name], label=model_name)
            
    else:
        for m, model_name in enumerate(model_names):
            p1 = ax3.bar(epoch+1, val_acc[m], color=color_key[model_name])
        
plt.ylim(0,100)
plt.xlim(0,num_epochs+1)
ax3.legend(title='Model type:', loc='right', prop={'size': 10})
plt.xticks(np.arange(1, num_epochs+1, 2.0))
ax3.yaxis.set_ticks_position('both')
ax3.minorticks_on()
ax3.tick_params(axis='y', which='minor', direction='out')
ax3.xaxis.set_tick_params(which='minor', bottom=False)

ax4 = plt.subplot(2, 2, 4)
plt.xlabel("Epochs")
plt.ylabel("Validation Loss")
for epoch in range(num_epochs):
    val_loss = [i for i in np.array(model_val_loss)[:,epoch]]
    val_loss, model_names, colors_sorted = (list(x) for x in zip(*sorted(zip(val_loss, params['model_names'], colors), reverse=True)))
    
    if epoch == params['max_epochs'][0]-1: 
    #if epoch == 0:
        for m, model_name in enumerate(model_names):
            p1 = ax4.bar(epoch+1, val_loss[m], color=color_key[model_name], label=model_name)
    else:
        for m, model_name in enumerate(model_names):
            p1 = ax4.bar(epoch+1, val_loss[m], color=color_key[model_name])

plt.xlim(0,num_epochs+1)
plt.ylim(0,2)
ax4.legend(title='Model type:', loc='right', prop={'size': 10})
plt.xticks(np.arange(1, num_epochs+1, 2.0))
ax4.yaxis.set_ticks_position('both')
ax4.minorticks_on()
ax4.tick_params(axis='y', which='minor', direction='out')
ax4.xaxis.set_tick_params(which='minor', bottom=False)

In [None]:
#fig = plt.figure(figsize=(12,5))
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15,10))

num_epochs = 20
ax1 = plt.subplot(2, 2, 1)

plt.xlabel("Epochs")
plt.ylabel("Train Accuracy")
colors = ['darkred', 'red', 'salmon', 'lightsalmon','bisque', 'lightgreen', 'darkgreen', 'lightblue']  
color_key = {'vgg19': 'darkred', 'vgg16': 'red', 'resnet34':'salmon', 'resnet18':'lightsalmon', 'resnet152':'bisque',
            'densenet201':'lightgreen', 'densenet169':'darkgreen', 'alexnet':'lightblue'}

for epoch in range(num_epochs):
    train_acc = [i.cpu().numpy()*100 for i in np.array(model_train_accs)[:,epoch]]
    train_acc, model_names = (list(x) for x in zip(*sorted(zip(train_acc, params['model_names']), reverse=True)))
    #colors_sorted = ['darkred', 'red', 'salmon', 'lightsalmon','bisque', 'lightgreen', 'darkgreen', 'lightblue']     
            
    if epoch == params['max_epochs'][0]-1: 
        for m, model_name in enumerate(model_names):
            print(color_key[model_name], train_acc[m], model_name)
            p1 = ax1.bar(epoch+1, train_acc[m], color=color_key[model_name], label=model_name)
            
    else:
        p1 = ax1.bar(epoch+1, train_acc, color=colors_sorted)
        
plt.ylim(30,100)
plt.xlim(0,num_epochs+1)
ax1.legend(title='Model type:', loc='right', prop={'size': 10})
plt.xticks(np.arange(1, num_epochs+1, 2.0))
ax1.yaxis.set_ticks_position('both')
ax1.minorticks_on()
ax1.tick_params(axis='y', which='minor', direction='out')
ax1.xaxis.set_tick_params(which='minor', bottom=False)

ax2 = plt.subplot(2, 2, 2)
plt.xlabel("Epochs")
plt.ylabel("Train Loss")
for epoch in range(num_epochs):
    train_loss = [i for i in np.array(model_train_loss)[:,epoch]]
    train_loss, model_names = (list(x) for x in zip(*sorted(zip(train_loss, params['model_names']), reverse=True)))
    
    if epoch == params['max_epochs'][0]-1: 
    #if epoch == 0:
        for m, model_name in enumerate(model_names):
            p1 = ax2.bar(epoch+1, train_loss[m], color=color_key[model_name], label=model_name)
    else:
        for m, model_name in enumerate(model_names):
            p1 = ax2.bar(epoch+1, train_loss[m], color=color_key[model_name])

plt.xlim(0,num_epochs+1)
#plt.ylim(0,2)
ax2.legend(title='Model type:', loc='right', prop={'size': 10})
plt.xticks(np.arange(1, num_epochs+1, 2.0))
ax2.yaxis.set_ticks_position('both')
ax2.minorticks_on()
ax2.tick_params(axis='y', which='minor', direction='out')
ax2.xaxis.set_tick_params(which='minor', bottom=False)

ax3 = plt.subplot(2, 2, 3)

plt.xlabel("Epochs")
plt.ylabel("Validation Accuracy")
colors_sorted = ['darkred', 'red', 'salmon', \
                 'lightsalmon','bisque', 'lightgreen', \
                 'darkgreen', 'lightblue']  

#colors = plt.cm.rainbow(np.linspace(0,1,9))
for epoch in range(num_epochs):
    val_acc = [i.cpu().numpy()*100 for i in np.array(model_val_accs)[:,epoch]]
    val_acc, model_names = (list(x) for x in zip(*sorted(zip(val_acc, params['model_names']), reverse=True)))

    if epoch == params['max_epochs'][0]-1: 
        for m, model_name in enumerate(model_names):
            print(color_key[model_name], val_acc[m], model_name)
            p1 = ax3.bar(epoch+1, val_acc[m], color=color_key[model_name], label=model_name)
            
    else:
        for m, model_name in enumerate(model_names):
            p1 = ax3.bar(epoch+1, val_acc[m], color=color_key[model_name])
        
plt.ylim(30,100)
plt.xlim(0,num_epochs+1)
ax3.legend(title='Model type:', loc='right', prop={'size': 10})
plt.xticks(np.arange(1, num_epochs+1, 2.0))
ax3.yaxis.set_ticks_position('both')
ax3.minorticks_on()
ax3.tick_params(axis='y', which='minor', direction='out')
ax3.xaxis.set_tick_params(which='minor', bottom=False)

ax4 = plt.subplot(2, 2, 4)
plt.xlabel("Epochs")
plt.ylabel("Validation Loss")
for epoch in range(num_epochs):
    val_loss = [i for i in np.array(model_val_loss)[:,epoch]]
    val_loss, model_names= (list(x) for x in zip(*sorted(zip(val_loss, params['model_names']), reverse=True)))
    
    if epoch == params['max_epochs'][0]-1: 
    #if epoch == 0:
        for m, model_name in enumerate(model_names):
            p1 = ax4.bar(epoch+1, val_loss[m], color=color_key[model_name], label=model_name)
    else:
        for m, model_name in enumerate(model_names):
            p1 = ax4.bar(epoch+1, val_loss[m], color=color_key[model_name])

plt.xlim(0,num_epochs+1)
plt.ylim(0,2)
ax4.legend(title='Model type:', loc='right', prop={'size': 10})
plt.xticks(np.arange(1, num_epochs+1, 2.0))
ax4.yaxis.set_ticks_position('both')
ax4.minorticks_on()
ax4.tick_params(axis='y', which='minor', direction='out')
ax4.xaxis.set_tick_params(which='minor', bottom=False)

In [None]:
train_accs = np.array(model_train_accs).transpose(1,0)*100
df_train = pd.DataFrame(train_accs, index=epochs, columns=params['model_names'], dtype = np.float64)
val_accs = np.array(model_val_accs).transpose(1,0)*100
df_val = pd.DataFrame(val_accs, index=epochs, columns=params['model_names'], dtype = np.float64)

In [None]:
df_train.plot(kind='bar', colormap='rainbow', stacked=False, figsize=(12,5), ylim=[20,120], xlim=[0,29]).legend(
    loc='upper center', ncol=4, title="Model Name")
plt.axhline(y=100, color='k', linestyle='--', lw=1)
plt.xlabel('Epochs')
plt.ylabel('Training Accuracy')

df_val.plot(kind='bar', colormap='rainbow', stacked=False, figsize=(12,5), ylim=[20,120], xlim=[0,29]).legend(
    loc='upper center', ncol=4, title="Model Name")
plt.axhline(y=100, color='k', linestyle='--', lw=1)
plt.xlabel('Epochs')
plt.ylabel('Validation Accuracy')


In [None]:
fig = plt.figure(figsize=(12,5))
num_epochs = 10
ax1 = plt.subplot(1, 2, 1)
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
colors = ['lightblue', 'blue', 'darkblue', 'gold','red', 'darkred', 'lightgreen', 'darkgreen']  
width = 0.35
for epoch in range(num_epochs):
    val_acc = [i.cpu().numpy()*100 for i in np.array(model_val_accs)[:,epoch]]
    val_acc, model_names, colors_sorted = (list(x) for x in zip(*sorted(zip(val_acc, params['model_names'], colors), reverse=True)))

    if epoch == 0: 
        for m, model_name in enumerate(model_names):
            p1 = ax1.bar(epoch+1, val_acc[m], width=width, color=colors_sorted[m], label=model_name)
            p1 = ax1.bar(epoch+1+width, val_acc[m], width=width, color=colors_sorted[m], label=model_name)
    else:
        p1 = ax1.bar(epoch+1, val_acc, color=colors_sorted)

plt.ylim(20,100)
plt.xlim(0,num_epochs+1)
#ax1.legend(title='Model type:', loc='right', prop={'size': 10})
plt.xticks(np.arange(1, num_epochs+1, 2.0))
ax1.yaxis.set_ticks_position('both')
ax1.minorticks_on()
ax1.tick_params(axis='y', which='minor', direction='out')
ax1.xaxis.set_tick_params(which='minor', bottom=False)

ax2 = plt.subplot(1, 2, 2)
plt.xlabel("Epochs")
plt.ylabel("Loss")
for epoch in range(num_epochs):
    val_loss = [i for i in np.array(model_val_loss)[:,epoch]]
    val_loss, model_names, colors_sorted = (list(x) for x in zip(*sorted(zip(val_loss, params['model_names'], colors), reverse=True)))

    if epoch == 0: 
        for m, model_name in enumerate(model_names):
            p1 = ax2.bar(epoch+1, val_loss[m], width=width, color=colors_sorted[m], label=model_name)
    else:
        p1 = ax2.bar(epoch+1, val_loss, color=colors_sorted)


plt.xlim(0,num_epochs+1)
ax2.legend(title='Model type:', loc='right', prop={'size': 10})
plt.xticks(np.arange(1, num_epochs+1, 2.0))
ax2.yaxis.set_ticks_position('both')
ax2.minorticks_on()
ax2.tick_params(axis='y', which='minor', direction='out')
ax2.xaxis.set_tick_params(which='minor', bottom=False)


# View Predictions on Validation Data 

In [None]:
def process_image(image):
    ''' Scales, crops, and normalizes a PIL image for a PyTorch model,
        returns a Numpy array
    '''

    preprocess = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    image = preprocess(image)
    return image


In [None]:
def predict2(path, model, topk=9):
    ''' Predict the class (or classes) of an image using a trained deep learning model.
    '''
    img = Image.open(path)
    img = img.convert('RGB')
    img = process_image(img)
    
    # Convert 2D image to 1D vector
    img = np.expand_dims(img, 0)

    img = torch.from_numpy(img)
    
    model.eval()
    inputs = Variable(img).to(device)
    logits = model.forward(inputs)
    
    ps = F.softmax(logits,dim=1)
    topk = ps.cpu().topk(topk)
    
    return (e.data.numpy().squeeze().tolist() for e in topk)

In [None]:
def view_classify(im, prob, crystal_names):
    ''' Function for viewing an image and it's predicted classes.
    '''
    
    image = Image.open(im)
    fig, (ax1, ax2) = plt.subplots(figsize=(7, 10), ncols=1, nrows=2)
    
    ax1.set_title(crystal_names[0])
    ax1.imshow(image)
    ax1.axis('off')
    
    y_pos = np.arange(len(prob))
    ax2.barh(y_pos, prob, align='center')
    ax2.set_yticks(y_pos)
    ax2.set_yticklabels(crystal_names)
    ax2.tick_params(axis='y', rotation=45)
    ax2.invert_yaxis()  # labels read top-to-bottom
    ax2.set_title('Class Probability')
    plt.show()
    #current_time = time.strftime("%Y%m%d-%H%M%S")
    #fig.savefig('classify/'+current_time+'.png',bbox_inches='tight',pad_inches=.3)
    plt.close()

In [None]:
model = torch.load('../saved_models/vgg19_bs128_e20_13classes').cuda()
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
val_loader = torch.load('../saved_models/val_loader.pth')
for batch_idx, (imgs, labels, img_paths) in enumerate(val_loader):
    #predictions = model_ft(imgs)
    #preds = torch.max(predictions, 1).indices.tolist()    
    
    for im in img_paths:
        probs, classes = predict2(im, model.to(device))  
        crystal_names = [params['class_names'][e] for e in classes]
        view_classify(im, probs, crystal_names)
   

In [None]:
def save_image(data_dir, save_dir, crystal_names, im):
    image = Image.open(data_dir + im).convert("RGB")
    if crystal_names[0] == 'rimed aggs':
        crystal_names[0] = 'rimed_aggs'
    if crystal_names[0] == 'rimed columns':
        crystal_names[0] = 'rimed_columns'
    if crystal_names[0] == 'compact irregulars':
         crystal_names[0] = 'compact_irregulars'
    if not os.path.exists(save_dir+crystal_names[0]):
        os.makedirs(save_dir+crystal_names[0])
    image.save(save_dir+crystal_names[0]+'/'+im) 
    # cv2.imwrite(save_dir+crystal_names[0]+'/'+im, image)

# Predict on new data - Test Data

In [None]:
class TestDataSet(Dataset):
    def __init__(self, main_dir, transform):
        self.main_dir = main_dir
        self.transform = transform
        all_imgs = os.listdir(main_dir)
        self.total_imgs = natsorted(all_imgs)

    def __len__(self):
        return len(self.total_imgs)

    def __getitem__(self, idx):
        img_loc = os.path.join(self.main_dir, self.total_imgs[idx])
        image = Image.open(img_loc)
        #print(image)
        #image =cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        tensor_image = self.transform(image)
        path = self.total_imgs[idx]
        return tensor_image, path

model = torch.load('../saved_models/bs128_e50_13classes_clean_vgg19').cuda()
model.eval()
campaign = 'ARM'
data_dir = '../cpi_data/campaigns/'+campaign+'/single_imgs/'
#save_dir = 'cpi_data/campaigns/'+campaign+'/'

#apply same transforms
test_transforms = transforms.Compose([transforms.Resize(224),
                        transforms.ToTensor(),
                        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

testdata = TestDataSet(data_dir, transform=test_transforms)
test_loader = torch.utils.data.DataLoader(testdata, batch_size=100, shuffle=False, 
                               num_workers=20, drop_last=True)

In [None]:
class TestDataSet(Dataset):
    def __init__(self, open_dir, file_list):
        self.desired_size = 1000
        self.open_dir = open_dir
        self.transform = transforms.Compose([
            transforms.Resize((224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])
        
        self.all_paths = natsorted(file_list)

    def __len__(self):
        return len(self.all_paths)

    def __getitem__(self, idx):
        img_path = os.path.join(self.open_dir, self.all_paths[idx])
        #image = Image.open(img_path)
        
        #training images were resized to 1000x1000 initially
        image = cv2.cvtColor(cv2.imread(self.open_dir+self.all_paths[idx], cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (self.desired_size, self.desired_size), interpolation = cv2.INTER_AREA)

        image = Image.fromarray(image) #convert back to PIL for transforms
        image = image.convert('RGB')
        image = self.transform(image)

        path = self.all_paths[idx]
        return (image, path)
    
model = torch.load('../saved_models/bs128_e50_13classes_clean_vgg19').cuda()
model.eval()
campaign = 'MPACE'
df = pd.read_pickle('../final_databases/no_mask/df_good_ice_'+campaign+'.pkl')
data_dir = '../cpi_data/campaigns/'+campaign+'/single_imgs/'
testdata = TestDataSet(data_dir, df['filename'])
test_loader = torch.utils.data.DataLoader(testdata, batch_size=100, shuffle=False, 
                               num_workers=20)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (imgs, img_paths) in enumerate(test_loader):
    #predictions = model_ft(imgs)
    #preds = torch.max(predictions, 1).indices.tolist()   
    for im in img_paths:
        path = data_dir + im
        probs, classes = predict2(path, model.to(device))
        crystal_names = [params['class_names'][e] for e in classes]
        view_classify(path, probs, crystal_names)
        #save_image(data_dir, save_dir, crystal_names, im)

In [None]:
model.eval()
for batch_idx, (imgs, img_paths) in enumerate(test_loader):
    for im in img_paths:
        path = data_dir+im
        img_og = Image.open(path)
        img = img_og.convert('RGB')
        img = process_image(img)

        # Convert 2D image to 1D vector
        img = np.expand_dims(img, 0)

        img = torch.from_numpy(img)
        prediction = model(img)
        cpu_pred = prediction.cpu()
        result = cpu_pred.data.numpy()
        print(class_names[result.argmax()])
        fig, ax = plt.subplots(figsize=(5, 5))
        ax.imshow(img_og)
        plt.show()
  