In [1]:
from david_cnn import *
os.environ['XDG_CACHE_HOME']='/tmp/xdg-cache'


In [2]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.xavier_normal_(m.weight.data)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

In [3]:
# Check if your system supports CUDA
use_cuda = torch.cuda.is_available()

# Setup GPU optimization if CUDA is supported
if use_cuda:
    computing_device = torch.device("cuda")
    extras = {"num_workers": 1, "pin_memory": True}
    print("CUDA is supported")
else: # Otherwise, train on the CPU
    computing_device = torch.device("cpu")
    extras = False
    print("CUDA NOT supported")

net=Nnet().to(computing_device)
net.apply(weights_init)

# Print the model
print(net)

#loss criteria are defined in the torch.nn package
criterion = nn.CrossEntropyLoss()

#Instantiate the gradient descent optimizer - use Adam optimizer with default parameters
optimizer = optim.Adam(net.parameters(),lr = 0.001)

# Save states of a new net
init_state = copy.deepcopy(net.state_dict())
init_state_opt = copy.deepcopy(optimizer.state_dict())

CUDA is supported
Nnet(
  (main): Sequential(
    (0): Conv2d(3, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU(inplace=True)
    (2): Conv2d(18, 30, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(30, 35, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (6): Conv2d(35, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (7): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(40, 45, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (11): Conv2d(45, 50, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (12): BatchNorm2d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)


In [4]:
transform = transforms.Compose([transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()])
dataset = loader('train.csv','/datasets/cs154-fa19-public/',transform=transform)
batch_size = 64
validation_split = .2
shuffle_dataset = True
random_seed= 42

# Creating data indices for training and validation splits:
# Addressing data label imbalance issue through resampling 
dataframe = dataset.frame
X = dataset.frame['path'] # Path names (X)
y = dataset.frame['label'] # Labels (y)
#print(dataframe.head())

if shuffle_dataset :
    np.random.seed(random_seed)
    dataframe = dataframe.sample(frac=1).reset_index(drop=True)

In [None]:
# Iterating over K-folds
kfold = 1
folds_acc = []
models = []
optims = []
early_stop_num = 5
skf = StratifiedKFold(n_splits=2, random_state=None, shuffle=False)

for train_index, test_index in skf.split(X, y):
    best_model = None
    best_optim = None
    print('kfold:', kfold)
    # Loading a new net for training
    net.load_state_dict(init_state)
    optimizer.load_state_dict(init_state_opt)
    #print(len(train_index), len(test_index))
    #print(test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    #print('x_test', len(X_test))
    class_labels_count = np.array([len(np.where(y_train==t)[0]) for t in np.unique(y_train)])
    #print(class_labels_count)
    weights = [0]*(len(np.unique(y_train))+1)
    for i in range(len(class_labels_count)):   
        weights[i+1] = 1. / class_labels_count[i]
    #print(weights)
    sample_weights = np.array([weights[t] for t in y_train])

    #dataset_size = len(dataset)
    #indices = list(range(dataset_size))
    #split = int(np.floor(validation_split * dataset_size))
    #train_indices, val_indices = indices[split:], indices[:split]
    #print('val_in:', len(val_indices))
    val_indices = test_index


    # Creating PT data samplers and loaders:
    #train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)
    train_sampler = WeightedRandomSampler(sample_weights, replacement=False, num_samples=len(sample_weights))
    #valid_sampler = WeightedRandomSampler(sample_weights, replacement=True, num_samples=len(sample_weights))

    train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                               sampler=train_sampler)
    validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                    sampler=valid_sampler)

    # Track the loss across training
    total_loss = []
    avg_minibatch_loss = []
    N = 50

    for epoch in range(10):
        print('epoch:', epoch)
        N_minibatch_loss = 0.0
        correct = 0
        total = 0
        best_accuracy = 0

        # Get the next minibatch of images, labels for training
        for minibatch_count, (images, labels) in enumerate(train_loader, 0):
            #print("mini_batch", minibatch_count)
            #print('labels', len(labels))
            # Zero out the stored gradient (buffer) from the previous iteration
            optimizer.zero_grad()
            # Put the minibatch data in CUDA Tensors and run on the GPU if supported
            images, labels = images.to(computing_device), labels.to(computing_device)
            # Perform the forward pass through the network and compute the loss
            outputs = net(images)

            loss = criterion(outputs, labels)
            # Automagically compute the gradients and backpropagate the loss through the network
            loss.backward()

            # Update the weights
            optimizer.step()    
            # Add this iteration's loss to the total_loss
            total_loss.append(loss.item())
            N_minibatch_loss += loss


            if minibatch_count % N == 49:
                #Print the loss averaged over the last N mini-batches
                N_minibatch_loss /= N
                print('Epoch %d, average minibatch %d loss: %.3f' % (epoch + 1, minibatch_count+1, N_minibatch_loss))
                # Add the averaged loss over N minibatches and reset the counter
                avg_minibatch_loss.append(N_minibatch_loss)
                N_minibatch_loss = 0.0
        print("Finished", epoch + 1, "epochs of training")
        
        # Implement validation #with torch.no_grad():
        with torch.no_grad():
            for minibatch_count, (images, labels) in enumerate(validation_loader, 0):
                # Put the minibatch data in CUDA Tensors and run on the GPU if supported
                images, labels = images.to(computing_device), labels.to(computing_device)
                #print(len(labels))
                for index in range(len(labels)):    
                    real_y = labels[index]
                    #print('real_y', real_y)
                    out = net(images[index].view(-1,3,224,224))
                    #print('out:', out)
                    pred_y = torch.argmax(out)
                    #print('pred_y', pred_y)

                    if (real_y.item() == pred_y.item()):
                        correct += 1
                    total += 1
        accuracy = correct/total
        
        # Early stop checking
        if accuracy <= best_accuracy:
            es_count += 1
            if es_count == early_stop_num:
                print('breaking')
                break
        else:
            best_accuracy = accuracy
            best_model = copy.deepcopy(net.state_dict())
            best_optim = copy.deepcopy(optimizer.state_dict())
            
        print('Iteration accuracy: %.3f' % ((accuracy)*100))
    
    # Best model for kth fold
    models.append(best_model)
    optims.append(best_optim)
    print('Best accuracy: %.3f' % ((best_accuracy)*100))
    folds_acc.append(best_accuracy)
    kfold += 1

kfold: 1
epoch: 0
Epoch 1, average minibatch 50 loss: 4.586
Epoch 1, average minibatch 100 loss: 4.043
Epoch 1, average minibatch 150 loss: 3.618


In [None]:
# Getting best model
best_idx = np.argmax(folds_acc)
best_model = models[best_idx]
best_optim = optims[best_idx]
# Loading
net.load_state_dict(best_model)
optimizer.load_state_dict(best_optim)

# Using whole dataset as test set
transform = transforms.Compose([transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()])
testset = loader('test.csv','/datasets/cs154-fa19-public/',transform=transform)
testset_size = len(testset)
indices = list(range(testset_size))
#split = int(np.floor(validation_split * dataset_size))
#train_indices, val_indices = indices[split:], indices[:split]
valid_sampler = SubsetRandomSampler(indices)
validation_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                                    sampler=valid_sampler)

y_vals = []
y_preds = []
with torch.no_grad():
    for minibatch_count, (images, labels) in enumerate(validation_loader, 0):
        # Put the minibatch data in CUDA Tensors and run on the GPU if supported
        images, labels = images.to(computing_device), labels.to(computing_device)
        for index in range(len(labels)):    
            real_y = labels[index]
            y_vals.append(real_y.item())
            #print('real_y', real_y
            out = net(images[index].view(-1,3,224,224))
            #print('out:', out)
            pred_y = torch.argmax(out)
            y_preds.append(pred_y.item())
            #print('pred_y', pred_y)

            if (real_y.item() == pred_y.item()):
                correct += 1
            total += 1
    accuracy = correct/total

print(classification_report(y_vals, y_preds, labels=range(201)))
#print(confusion_matrix(y_vals, y_preds, labels=range(201)))