In [11]:
import torch
from torch import nn
import pandas as pd

In [26]:
#Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1)[0,1)
X=torch.rand((2,8))

In [27]:
def dropout(X,p):
    if p==1:
        return torch.zeros(X.shape).float()
    elif p==0:
        return X
    else:
        mask=(torch.rand(X.shape)>p).float()
        X=mask*X/(1.0-p)
        return X

In [4]:
X = torch.arange(16, dtype=torch.float32).reshape((2, 8))

In [28]:
dropout(X,0.5)

tensor([[0.9722, 0.0000, 0.0000, 0.0000, 1.1911, 0.7410, 0.2447, 0.0000],
        [0.0000, 0.1765, 1.9159, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]])

In [29]:
X

tensor([[0.4861, 0.7717, 0.8177, 0.2550, 0.5955, 0.3705, 0.1224, 0.6916],
        [0.3495, 0.0882, 0.9579, 0.8867, 0.2703, 0.6762, 0.8970, 0.8395]])

In [71]:
net=nn.Sequential(nn.Flatten(),
                 nn.Linear(784,256),
                 nn.ReLU(),
                 nn.Dropout(0.2),
                 nn.Linear(256,10))
def init_weights(layer):
    if type(layer)==nn.Linear:
        nn.init.normal_(layer.weight,std=0.1)
net.apply(init_weights)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Dropout(p=0.2, inplace=False)
  (4): Linear(in_features=256, out_features=10, bias=True)
)

In [72]:
optimizer=torch.optim.SGD(net.parameters(),lr=0.01)
loss=nn.CrossEntropyLoss()
def accuracy(y_hat,y):
#     print(y_hat.shape,y.shape)
#     assert y_hat.shape==y.shape
    y_hat=y_hat.argmax(1) #by row

    correct=y_hat.type(y.dtype)==y
    return float(correct.type(y.dtype).sum())

In [73]:

# import torchvision
# train_loader = torch.utils.data.DataLoader(
#   torchvision.datasets.MNIST('/files/', train=True, download=True,
#                              transform=torchvision.transforms.Compose([
#                                torchvision.transforms.ToTensor(),
#                                torchvision.transforms.Normalize(
#                                  (0.1307,), (0.3081,))
#                              ])),
#   batch_size=256, shuffle=True)
# test_loader = torch.utils.data.DataLoader(
#   torchvision.datasets.MNIST('/files/', train=False, download=True,
#                              transform=torchvision.transforms.Compose([
#                                torchvision.transforms.ToTensor(),
#                                torchvision.transforms.Normalize(
#                                  (0.1307,), (0.3081,))
#                              ])),
#   batch_size=256, shuffle=True)

In [74]:
def create_data():
    train_loader = torch.utils.data.DataLoader(
                          torchvision.datasets.MNIST('/files/', train=True, download=True,
                                                     transform=torchvision.transforms.Compose([
                                                       torchvision.transforms.ToTensor(),
                                                       torchvision.transforms.Normalize(
                                                         (0.1307,), (0.3081,))
                                                     ])),
                                      batch_size=256, shuffle=True)
    test_loader = torch.utils.data.DataLoader(
                          torchvision.datasets.MNIST('/files/', train=False, download=True,
                                                     transform=torchvision.transforms.Compose([
                                                       torchvision.transforms.ToTensor(),
                                                       torchvision.transforms.Normalize(
                                                         (0.1307,), (0.3081,))
                                                     ])),
                          batch_size=256, shuffle=True)
        
    return train_loader ,test_loader

In [75]:
def training(epochs,net,loss,acc,optim,train_loader):
    net.train()
    for i in range(epochs):
        total_loss,total_acc,data_size=0,0,0
        for X,y in train_loader:
            data_size+=len(y)
            y_hat=net(X)
            l=loss(y_hat,y)
            total_loss+=l
            total_acc+=acc(y_hat,y)
            optim.zero_grad()
            l.backward()
            optim.step()
        print('Epoch-',i,' Loss-',total_loss/data_size,' Acc-',total_acc/data_size)
        

In [76]:
train_loader,test_loader=create_data()

In [77]:
training(5,net,loss,accuracy,optimizer,train_loader)

Epoch- 0  Loss- tensor(0.0044, grad_fn=<DivBackward0>)  Acc- 0.68665
Epoch- 1  Loss- tensor(0.0021, grad_fn=<DivBackward0>)  Acc- 0.8321666666666667
Epoch- 2  Loss- tensor(0.0017, grad_fn=<DivBackward0>)  Acc- 0.8651833333333333
Epoch- 3  Loss- tensor(0.0015, grad_fn=<DivBackward0>)  Acc- 0.8813666666666666
Epoch- 4  Loss- tensor(0.0014, grad_fn=<DivBackward0>)  Acc- 0.8900666666666667


In [52]:
net[1].weight

Parameter containing:
tensor([[ 0.1315,  0.1927, -0.1013,  ...,  0.0380,  0.0527,  0.0513],
        [ 0.0249,  0.0594, -0.0098,  ...,  0.3307,  0.0299,  0.0261],
        [ 0.0354,  0.0494,  0.0046,  ..., -0.0247,  0.1026, -0.0394],
        ...,
        [-0.0520, -0.1753,  0.0335,  ...,  0.0927,  0.1847, -0.0168],
        [-0.0211,  0.0732,  0.0101,  ...,  0.0420, -0.0950, -0.0056],
        [-0.1460, -0.0339,  0.0132,  ...,  0.0821,  0.0428,  0.1787]],
       requires_grad=True)

In [53]:
net[4].weight

Parameter containing:
tensor([[-0.0045, -0.0567,  0.0135,  ...,  0.1550,  0.0364,  0.0462],
        [ 0.0719, -0.0557, -0.0207,  ...,  0.0418, -0.0518,  0.1198],
        [ 0.0187, -0.0457,  0.0773,  ..., -0.1753,  0.0487, -0.0368],
        ...,
        [-0.0063,  0.0088, -0.2056,  ...,  0.0691,  0.0834,  0.1116],
        [ 0.0356,  0.0272,  0.1315,  ...,  0.0069, -0.0144,  0.1113],
        [ 0.1483,  0.1382,  0.1100,  ..., -0.2360, -0.0533,  0.0563]],
       requires_grad=True)

In [79]:
net[4].weight

Parameter containing:
tensor([[-0.2365, -0.0975,  0.0548,  ..., -0.1893, -0.0471,  0.0926],
        [-0.1272,  0.0732,  0.0634,  ...,  0.0177,  0.1581, -0.1287],
        [ 0.1519,  0.0245,  0.0419,  ..., -0.0447, -0.0708,  0.0498],
        ...,
        [-0.0633, -0.0823, -0.0161,  ..., -0.0881, -0.0169,  0.1521],
        [ 0.0968,  0.0308,  0.0501,  ..., -0.1382, -0.0164,  0.0424],
        [-0.0717, -0.0325, -0.1317,  ..., -0.0694,  0.0725,  0.0357]],
       requires_grad=True)

# testing Mode

In [80]:
net.eval()

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Dropout(p=0.2, inplace=False)
  (4): Linear(in_features=256, out_features=10, bias=True)
)

In [84]:
total_loss,total_acc,data_size=0,0,0
for X,y in test_loader:
    data_size+=len(y)
    y_hat=net(X)
    l=loss(y_hat,y)
    total_loss+=l
    total_acc+=accuracy(y_hat,y)
            
print( 'Loss-',total_loss/data_size,' Acc-',total_acc/data_size)

Loss- tensor(0.0010, grad_fn=<DivBackward0>)  Acc- 0.9217
