In [1]:
import pylab as plt
import imageio
import os
import numpy as np
import pandas as pd
import time

In [2]:
path = "..\\data\\clean_images_index"

In [3]:
def path_label_loader(path):
    """
    Args:
     path: Folder path containing subfolders of images
    Output:
     images: List of image path
     labels: Numpy array of labels
    """
    images = []
    labels = []
    
    for s_folder in os.listdir(path):
        label = int(''.join([s for s in s_folder if s.isdigit()]))
        img_folder = os.path.join(path, s_folder)
        
        for img in os.listdir(img_folder):
            if img.endswith(".jpg"):
                image_path = os.path.join(img_folder, img)
                images.append(image_path)
                labels.append(label)
                
    labels = np.array(labels)
                
    return images, labels

In [4]:
images, labels = path_label_loader(path)

In [5]:
print(len(images))
print(len(labels))

2061
2061


In [6]:
np.unique(labels, return_counts=True)

(array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13]),
 array([156, 152, 156, 170, 150, 129, 178, 141, 185, 159, 152, 161, 172],
       dtype=int64))

In [7]:
def eval_on_test_set(verbose=1):
    
    test_data =  test_data.unsqueeze(dim=1)

    # test_data=test_data.to(device)
    # test_label=test_label.to(device)

    inputs = (test_data - mean)/std    # ONLY CHANGE IS HERE!

    scores=net( inputs ) 

    error = utils.get_error( scores , test_label)

    error += error.item()


    if verbose == 1:
        print( 'error rate on test set =', total_error*100 ,'percent\n')
    return total_error

In [8]:
def train_model(net, n_epoch, bs, lr, train_data, train_label, verbose=1):
    """
    Train a given model with specified hyperparameters
    
    Args:
     net: NN model to be trained
     n_epoch: Number of epochs to train the model
     bs: Batch size for minibatch GD
     lr: Learning rate of GD
     train_data: Torch tensor of dim [N:rgb:width:height]
     train_label: Torch tensor of dim [N]
     verbose: Print out metrics during training if 1, default 1
    
    Output:
     net: Trained NN model
     records: Dictionary containing metrics history, including training loss/error for each epoch/minibatch, test error for each epoch
    """
    
    N = train_data.shape[0]
    
    net = net.to(device)
    mean = train_data.mean()
    mean = mean.to(device)
    std = train_data.std()
    std = std.to(device)
    
    train_loss_hist_mb = []
    train_loss_hist = []
    train_error_hist = []
    test_error_hist = []
    
    start=time.time()

    for epoch in range(1,n_epoch+1):

        if not epoch%5:
            lr = lr / 1.5

        optimizer=torch.optim.SGD( net.parameters() , lr=lr )

        running_loss=0
        running_error=0
        num_batches=0

        shuffled_indices=torch.randperm(N)

        for count in range(0,N,bs):

            # FORWARD AND BACKWARD PASS

            optimizer.zero_grad()

            indices=shuffled_indices[count:count+bs]
            minibatch_data =  train_data[indices].unsqueeze(dim=1)
            minibatch_label=  train_label[indices]

            minibatch_data=minibatch_data.to(device)
            minibatch_label=minibatch_label.to(device)

            inputs = (minibatch_data - mean)/std      # ONLY CHANGE IS HERE!

            inputs.requires_grad_()

            scores=net( inputs ) 

            loss =  criterion( scores , minibatch_label) 

            loss.backward()

            optimizer.step()


            # COMPUTE STATS

            running_loss += loss.detach().item()

            error = utils.get_error( scores.detach() , minibatch_label)
            running_error += error.item()

            num_batches+=1   
                        
            train_loss_hist_mb.append(running_error/num_batches)


        # AVERAGE STATS THEN DISPLAY
        total_loss = running_loss/num_batches
        total_error = running_error/num_batches
        elapsed = (time.time()-start)/60

        if verbose == 1:
            print('epoch=',epoch, '\t time=', elapsed,'min', '\t lr=', my_lr  ,'\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
        test_error = eval_on_test_set(test_data, test_label, verbose=verbose) 
        
        train_loss_hist.append(total_loss)
        train_error_hist.append(total_error)
        test_error_list.append(test_error)
    
    records = {'train_loss_mb': train_loss_hist_mb,
              'train_loss': train_loss_hist,
              'train_error': train_error_hist,
              'test_error': test_error_list}
        
    return net, records