In [420]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchsummary import summary
from torchvision import datasets, transforms as T
from torchvision.utils import make_grid

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [421]:
transform = T.Compose([
    T.ToTensor(),
    T.ColorJitter(brightness=(0.5, 1.2), contrast=(0.95, 1.2), saturation=(0.5, 1.5)),
    T.RandomAffine(60, translate=(0.1, 0.1), scale=(0.8, 2), shear=(0.5, 1.5)),
])
dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
dataloader = DataLoader(dataset, 64, True)

In [422]:
data, labels = next(iter(dataloader))
grid = make_grid(data)

In [541]:
class MyNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_layer = nn.Sequential(
            nn.Conv2d(1, 10, 3),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(10, 20, 3),
            nn.ReLU(),
        )

        self.max_pool = nn.MaxPool2d(2)

        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Dropout1d(0.2),
            nn.Linear(500, 250),
            nn.BatchNorm1d(250),
            nn.Mish(),

            nn.Dropout(0.2),
            nn.Linear(250, 100),
            nn.BatchNorm1d(100),
            nn.ReLU(),

            nn.Linear(100, 10),
        )
        
    def forward(self, x):
        x = self.conv_layer(x)
        x = self.max_pool(x)
        x = self.fc(x)
        
        return x


model = MyNet()

In [542]:
opt = optim.Adam(model.parameters(), 1e-1)
criterion = nn.CrossEntropyLoss()
scheduler = optim.lr_scheduler.ReduceLROnPlateau(opt, factor=0.5, threshold=1e-4, patience=10)

In [543]:
summary(model, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 26, 26]             100
              ReLU-2           [-1, 10, 26, 26]               0
         MaxPool2d-3           [-1, 10, 13, 13]               0
            Conv2d-4           [-1, 20, 11, 11]           1,820
              ReLU-5           [-1, 20, 11, 11]               0
         MaxPool2d-6             [-1, 20, 5, 5]               0
           Flatten-7                  [-1, 500]               0
         Dropout1d-8                  [-1, 500]               0
            Linear-9                  [-1, 250]         125,250
      BatchNorm1d-10                  [-1, 250]             500
             Mish-11                  [-1, 250]               0
          Dropout-12                  [-1, 250]               0
           Linear-13                  [-1, 100]          25,100
      BatchNorm1d-14                  [

In [544]:
epochs = 2

model.train()
for epoch in range(epochs):
    i = 0
    for x, y in dataloader:
        _label = model(x)
        loss = criterion(_label, y)

        opt.zero_grad()
        loss.backward()
        opt.step()

        if i % 20 == 0:
            print(f'EPOCH: {epoch}, Loss: {loss}, LR: {opt.param_groups[0]['lr']}')
            scheduler.step(loss)
        i += 1

EPOCH: 0, Loss: 2.388152837753296, LR: 0.1
EPOCH: 0, Loss: 2.2946152687072754, LR: 0.1
EPOCH: 0, Loss: 1.9909926652908325, LR: 0.1
EPOCH: 0, Loss: 1.8857176303863525, LR: 0.1
EPOCH: 0, Loss: 2.0614843368530273, LR: 0.1
EPOCH: 0, Loss: 1.9380584955215454, LR: 0.1
EPOCH: 0, Loss: 2.1154191493988037, LR: 0.1
EPOCH: 0, Loss: 1.8970551490783691, LR: 0.1
EPOCH: 0, Loss: 1.6628354787826538, LR: 0.1
EPOCH: 0, Loss: 1.5386840105056763, LR: 0.1
EPOCH: 0, Loss: 1.6684986352920532, LR: 0.1
EPOCH: 0, Loss: 1.8041095733642578, LR: 0.1
EPOCH: 0, Loss: 1.6704274415969849, LR: 0.1
EPOCH: 0, Loss: 1.3778164386749268, LR: 0.1
EPOCH: 0, Loss: 1.3806499242782593, LR: 0.1
EPOCH: 0, Loss: 1.6808650493621826, LR: 0.1
EPOCH: 0, Loss: 1.3844329118728638, LR: 0.1
EPOCH: 0, Loss: 1.2042601108551025, LR: 0.1
EPOCH: 0, Loss: 0.9488086104393005, LR: 0.1
EPOCH: 0, Loss: 1.4118154048919678, LR: 0.1
EPOCH: 0, Loss: 1.1782509088516235, LR: 0.1
EPOCH: 0, Loss: 1.1767133474349976, LR: 0.1
EPOCH: 0, Loss: 1.159417271614074

In [540]:
x, y = next(iter(dataloader))
img, label = x[0], y[0]

model.eval()
_label = model(img[None])

(tensor([[4.3908]], grad_fn=<AddmmBackward0>), tensor(3))

In [516]:
conv_layer = nn.Sequential(*list(model.children())[0][:4])

loss = (criterion(_label.squeeze(0), label))
loss.backward()

In [517]:
model

tensor(0.0176, grad_fn=<NllLossBackward0>)