# Lab-09-2 Weight initialization

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import random

In [2]:
mnist_train = dsets.MNIST(root="MNIST_data/", train=True, 
    transform=transforms.ToTensor(), download=False)
mnist_test = dsets.MNIST(root="MNIST_data/", train=False, 
    transform=transforms.ToTensor(), download=False)
data_loader = torch.utils.data.DataLoader(mnist_train,
                                          batch_size=64,
                                          shuffle=True,
                                          drop_last=True
                                         )

In [3]:
learning_rate = 1e-3
n_epochs = 15
n_batches = len(data_loader)

## Xavier Initialization

In [5]:
linear1 = torch.nn.Linear(784, 256, bias=True)
linear2 = torch.nn.Linear(256, 256, bias=True)
linear3 = torch.nn.Linear(256, 10, bias=True)
relu = torch.nn.ReLU()

torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[ 0.0020,  0.0501,  0.0260,  ...,  0.0965,  0.0749,  0.1139],
        [ 0.1014, -0.0256, -0.0242,  ...,  0.1049,  0.1398,  0.1210],
        [-0.0333, -0.0181,  0.0635,  ...,  0.0672,  0.1252,  0.1234],
        ...,
        [-0.0101,  0.0077, -0.1017,  ..., -0.0731,  0.0356, -0.0274],
        [-0.0438, -0.0434, -0.1463,  ...,  0.0982,  0.0260, -0.0800],
        [ 0.0196, -0.0416,  0.0125,  ...,  0.0007, -0.0368,  0.1416]],
       requires_grad=True)

In [6]:
model = torch.nn.Sequential(linear1, relu, 
                            linear2, relu, 
                            linear3)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
for epoch in range(n_epochs):
    avg_loss = 0
    for X, Y in data_loader:
        X = X.view(-1, 784)
        optimizer.zero_grad()
        output = model(X)
        loss = criterion(output, Y)
        loss.backward()
        optimizer.step()
        avg_loss += loss / n_batches
    
    print("Epoch: {} Cost: {}".format(epoch+1, avg_loss))

Epoch: 1 Cost: 0.21691398322582245
Epoch: 2 Cost: 0.08538837730884552
Epoch: 3 Cost: 0.05708051472902298
Epoch: 4 Cost: 0.04004073888063431
Epoch: 5 Cost: 0.03275870531797409
Epoch: 6 Cost: 0.026575565338134766
Epoch: 7 Cost: 0.02208746410906315
Epoch: 8 Cost: 0.020598605275154114
Epoch: 9 Cost: 0.014723679050803185
Epoch: 10 Cost: 0.015600338578224182
Epoch: 11 Cost: 0.014910999685525894
Epoch: 12 Cost: 0.01259804517030716
Epoch: 13 Cost: 0.009844214655458927
Epoch: 14 Cost: 0.010587536729872227
Epoch: 15 Cost: 0.012311047874391079


In [8]:
with torch.no_grad():
    X_test = mnist_test.test_data.view(-1, 784).float()
    Y_test = mnist_test.test_labels
    
    pred = model(X_test)
    correct_pred = torch.argmax(pred, 1) == Y_test
    accuracy = correct_pred.float().mean()
    print("Accuracy: {}".format(accuracy.item()))

Accuracy: 0.9805999994277954




# Lab-09-3 Dropout

In [13]:
linear1 = torch.nn.Linear(784, 256, bias=True)
linear2 = torch.nn.Linear(256, 256, bias=True)
linear3 = torch.nn.Linear(256, 10, bias=True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.5)

torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[-0.0552,  0.0067, -0.1462,  ..., -0.0670, -0.1451, -0.0183],
        [-0.1340, -0.1407,  0.1153,  ..., -0.0196, -0.0144, -0.0006],
        [-0.1484, -0.0331,  0.0070,  ..., -0.0917, -0.0694,  0.0797],
        ...,
        [ 0.0899, -0.0299, -0.0047,  ..., -0.1087,  0.0392,  0.1307],
        [-0.1126, -0.1444, -0.0798,  ..., -0.1301, -0.1408,  0.0884],
        [ 0.1244,  0.0938,  0.1329,  ...,  0.0330, -0.0098, -0.1262]],
       requires_grad=True)

In [14]:
model = torch.nn.Sequential(linear1, relu, dropout, 
                            linear2, relu, dropout, 
                            linear3)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [15]:
model.train()
for epoch in range(n_epochs):
    avg_loss = 0
    for X, Y in data_loader:
        X = X.view(-1, 784)
        optimizer.zero_grad()
        output = model(X)
        loss = criterion(output, Y)
        loss.backward()
        optimizer.step()
        avg_loss += loss / n_batches
    
    print("Epoch: {} Cost: {}".format(epoch+1, avg_loss))

Epoch: 1 Cost: 0.4149174690246582
Epoch: 2 Cost: 0.20577938854694366
Epoch: 3 Cost: 0.16741079092025757
Epoch: 4 Cost: 0.14535097777843475
Epoch: 5 Cost: 0.13062024116516113
Epoch: 6 Cost: 0.12106368690729141
Epoch: 7 Cost: 0.11250791698694229
Epoch: 8 Cost: 0.10650661587715149
Epoch: 9 Cost: 0.09896581619977951
Epoch: 10 Cost: 0.09910891950130463
Epoch: 11 Cost: 0.0920320600271225
Epoch: 12 Cost: 0.09067228436470032
Epoch: 13 Cost: 0.08724194765090942
Epoch: 14 Cost: 0.08798177540302277
Epoch: 15 Cost: 0.08296672999858856


In [16]:
with torch.no_grad():
    model.eval()
    X_test = mnist_test.test_data.view(-1, 784).float()
    Y_test = mnist_test.test_labels
    
    pred = model(X_test)
    correct_pred = torch.argmax(pred, 1) == Y_test
    accuracy = correct_pred.float().mean()
    print("Accuracy: {}".format(accuracy.item()))

Accuracy: 0.9764000177383423
