# PYTORCH COMMON MISTAKES - How To Save Time 🕒

- https://www.youtube.com/watch?v=O2wJ3tkc-TU&list=PLhhyoLH6IjfxeoooqP9rhU3HJIAVAJ3Vz&index=14

## 1. Train on a single batch to check if the model works or not
    - x_train, y_train = next(iter(train_loader))

## 2. Toggling model.train() and model.eval() while testing the model
    - Mainly for dropout and other 
    
## 3. Forgetting .zero_grad() in optimizer

## 4. Using softmax while using CrossEntropyLoss
    - CrossEntropyLoss uses softmax at first then log likelihood, so no point of using softmax in output layer
## 5. Bias term with batchNorm2d  [check]
    - bias=Flase, in conv layer
    
## 6. Using view as permute
    - permute is more generalized version of transpose
    
## 7. Incorrect Data augmentation
## 8. Not shuffling the Data
    - but not for time series data
## 9. Not Normalizing Data [check]
    - transforms.Normalize(mean=(x,), std=(y,))
    
## 10. Not clipping gradients [check]
    - mostly in RNNs, GRUs, LSTMs
    - to solve exploting gradient
    - after loss.backward
        - torch.nn.utlis.clip_grad_norm(model.parameters(), max_norm=1)

In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
torch.__version__

'1.7.1'

In [2]:
available_device = "cuda" if torch.cuda.is_available() else "cpu"
available_device 
 

'cuda'

In [3]:
!pwd

/home/felladog/Desktop/ML/pytorch_tutorials/time_saving_in_pytorch


## Create a fully connected network

In [33]:
# What is super in the class
# why x is replaced 

class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x


In [4]:
# SImple example to us
model = NN(784,10)
x = torch.rand((128,784))
y = model(x)
# How did passing x to the model instance called the forward method
print(y.shape)

torch.Size([128, 10])


## Setting the device

In [34]:
available_device = "cuda" if torch.cuda.is_available() else "cpu"
print(available_device)
device = torch.device(available_device)
device

cuda


device(type='cuda')

## Hyperparameters

In [35]:
batch_size = 64
l_r = 0.001
in_size = 784
c_size = 10 

print("Parameter initialized")

Parameter initialized


## Data Loaders

In [36]:
train_set = datasets.MNIST(root="../pytorch_dataset/",
                           train=True,
                           download=True,
                           transform=transforms.ToTensor())
test_set = datasets.MNIST(root="../pytorch_dataset/",
                          train=False,
                          download=True,
                          transform=transforms.ToTensor())

print("Dataset downloaded")

Dataset downloaded


In [37]:
train_loader = DataLoader(dataset=train_set,
                          batch_size=batch_size,
                          shuffle=True)
test_loader = DataLoader(dataset=test_set,
                        batch_size=batch_size,
                        shuffle=True)
print("Data loaded")

Data loaded


## Initialize the model

In [38]:
model = NN(input_size=in_size, num_classes=c_size).to(device=device)
print("initialized the model")

initialized the model


## Loss function define

In [41]:
criterion = nn.CrossEntropyLoss()

## Optimizer

In [40]:
optimizer = optim.Adam(model.parameters(), lr=l_r)

# Training the Network

In [34]:
len(train_loader)

938

In [40]:
len(train_set)/ 64

937.5

In [20]:
num_epochs = 2

## 1. Train on a single batch to check if the model works or not

In [16]:
x_train, y_train = next(iter(train_loader))

In [42]:

for epoch in range(num_epochs):
    print(epoch)
    for batch_idx, (x_train , y_train) in enumerate(train_loader):
        # print(x_train.shape)  # torch.Size([64, 1, 28, 28])
        # print(y_train.shape)  # torch.Size([64])
        x_train = x_train.to(device=device)
        y_train = y_train.to(device=device)
        x_train = x_train.reshape(x_train.shape[0], -1)
        y_pred = model(x_train)

        loss = criterion(y_pred, y_train) 
        # Zero previous gradients
        # Before the backward pass, use the optimizer object to zero all of the
        # gradients for the variables it will update (which are the learnable
        # weights of the model). This is because by default, gradients are
        # accumulated in buffers( i.e, not overwritten) whenever .backward()
        # is called. Checkout docs of torch.autograd.backward for more details.

        # Backward pass: compute gradient of the loss with respect to model
        # parameters
        loss.backward()

        # Calling the step function on an Optimizer makes an update to its
        # parameters
        optimizer.step()

        optimizer.zero_grad()
    print(f" loss at the end of {epoch} epoch is {loss}")

print("Done Training")


0
 loss at the end of 0 epoch is 0.5224602818489075
1
 loss at the end of 1 epoch is 0.4177483916282654
Done Training


# Measuring the accuracy of the model

In [43]:
correct = 0
total = 0
# model.eval()
with torch.no_grad():
    for data in test_loader:
        x_test, y_test = data
        x_test = x_test.to(device=device)
        y_test = y_test.to(device=device)
        x_test = x_test.reshape(x_test.shape[0], -1)

        preds = model(x_test)

        # print(preds.shape)
        _, pred_idx = preds.max(1)
        correct += (pred_idx == y_test).sum().item()
        total += y_test.size(0)

print(f"Out of {total} images {correct} where correctly classified")
acc = (correct/total) * 100
print(f"Accuracy of the model is {acc:.2f}")
# model.train()

Out of 10000 images 8891 where correctly classified
Accuracy of the model is 88.91


In [44]:
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in test_loader:
        x_test, y_test = data
        x_test = x_test.to(device=device)
        y_test = y_test.to(device=device)
        x_test = x_test.reshape(x_test.shape[0], -1)

        preds = model(x_test)

        # print(preds.shape)
        _, pred_idx = preds.max(1)
        correct += (pred_idx == y_test).sum().item()
        total += y_test.size(0)

print(f"Out of {total} images {correct} where correctly classified")
acc = (correct/total) * 100
print(f"Accuracy of the model is {acc:.2f}")
model.train()

Out of 10000 images 9371 where correctly classified
Accuracy of the model is 93.71


NN(
  (fc1): Linear(in_features=784, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=10, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [60]:
preds.max(1)[1].shape

torch.Size([64])

In [63]:
pred_idx

tensor([4, 6, 4, 9, 0, 6, 0, 4, 6, 6, 2, 6, 6, 4, 4, 4, 6, 0, 0, 1, 6, 0, 0, 0,
        6, 6, 1, 0, 6, 4, 0, 0, 6, 0, 6, 0, 0, 0, 0, 0, 0, 6, 0, 6, 0, 0, 0, 6,
        6, 0, 4, 1, 6, 0, 6, 6, 0, 6, 0, 6, 6, 6, 6, 6], device='cuda:0')

In [71]:
y_test.size(0)

64

In [70]:
true = pred_idx == y_test
print(true)
true.sum().item()

tensor([False, False, False, False, False, False, False, False, False, False,
        False,  True,  True, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False,  True, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,  True,
        False,  True,  True, False], device='cuda:0')


6

## TODO

- [ ] Document about the Neural Network