In [1]:
import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader

import os
from glob import glob
import numpy as np

from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
seed = 1

lr = 0.001
momentum = 0.5

batch_size = 64
test_batch_size = 64

epochs = 5

no_cuda = False
log_interval = 100

In [3]:
use_cuda = not no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
device

device(type='cuda')

## Model

In [15]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)
        
    def forward(self, x):
        # feature extraction
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)        
        #print(x.shape)
        
        # fully-connected
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

## Preprocessing

In [5]:
import os

In [6]:
os.listdir('dataset/mnist_png/')

['training', 'testing']

In [7]:
train_dir = 'dataset/mnist_png/training'
test_dir = 'dataset/mnist_png/testing'

In [8]:
torch.manual_seed(seed)

<torch._C.Generator at 0x7ff82811b6d0>

In [9]:
train_dataset = datasets.ImageFolder(root=train_dir, 
                                    transform=transforms.Compose([
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=(0.5,), std=(0.5,))
                                    ]))

test_dataset = datasets.ImageFolder(root=test_dir, 
                                    transform=transforms.Compose([
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=(0.5,), std=(0.5,))
                                    ]))

In [10]:
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(test_dataset,
                                           batch_size=batch_size)

## Optimization

In [17]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

## Learning Rate Scheduler

In [23]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [24]:
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=0, verbose=True)

## Training

In [12]:
from torch.utils.tensorboard import SummaryWriter

In [25]:
writer = SummaryWriter()

In [26]:
for epoch in range(1, epochs+1):
    # Train mode
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} {}/{} ({:.0f}%)\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100 * batch_idx / len(train_loader), 
                loss.item()))

    # Evaluation mode
    model.eval()

    test_loss = 0
    correct = 0

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()    
            pred = output.argmax(dim=1, keepdim=True)    
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    
    accuracy = 100 * correct / len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {} / {} ({:.0f}%)'.format(
        test_loss, correct, len(test_loader.dataset), accuracy))
    
    # Learning Rate Scheduler
    scheduler.step(accuracy, epoch)
    
    # For, tensorboard
    #if epoch == 0:
    grid = torchvision.utils.make_grid(data)
    writer.add_image('images', grid, epoch)
    writer.add_graph(model, data)
        
    writer.add_scalar('Loss/train', loss, epoch)
    writer.add_scalar('Loss/test', test_loss, epoch)
    writer.add_scalar('Accuracy/test', accuracy, epoch)

Train Epoch: 1 0/60000 (0%)	Loss: 0.053497
Train Epoch: 1 6400/60000 (11%)	Loss: 0.029345
Train Epoch: 1 12800/60000 (21%)	Loss: 0.089192
Train Epoch: 1 19200/60000 (32%)	Loss: 0.015356
Train Epoch: 1 25600/60000 (43%)	Loss: 0.069069
Train Epoch: 1 32000/60000 (53%)	Loss: 0.027784
Train Epoch: 1 38400/60000 (64%)	Loss: 0.152186
Train Epoch: 1 44800/60000 (75%)	Loss: 0.039388
Train Epoch: 1 51200/60000 (85%)	Loss: 0.019282
Train Epoch: 1 57600/60000 (96%)	Loss: 0.112572

Test set: Average loss: 0.0528, Accuracy: 9839 / 10000 (98%)
Train Epoch: 2 0/60000 (0%)	Loss: 0.036821




Train Epoch: 2 6400/60000 (11%)	Loss: 0.072331
Train Epoch: 2 12800/60000 (21%)	Loss: 0.042753
Train Epoch: 2 19200/60000 (32%)	Loss: 0.100839
Train Epoch: 2 25600/60000 (43%)	Loss: 0.144416
Train Epoch: 2 32000/60000 (53%)	Loss: 0.059611
Train Epoch: 2 38400/60000 (64%)	Loss: 0.009376
Train Epoch: 2 44800/60000 (75%)	Loss: 0.030996
Train Epoch: 2 51200/60000 (85%)	Loss: 0.013838
Train Epoch: 2 57600/60000 (96%)	Loss: 0.034159

Test set: Average loss: 0.0493, Accuracy: 9848 / 10000 (98%)
Train Epoch: 3 0/60000 (0%)	Loss: 0.037205
Train Epoch: 3 6400/60000 (11%)	Loss: 0.051210
Train Epoch: 3 12800/60000 (21%)	Loss: 0.009233
Train Epoch: 3 19200/60000 (32%)	Loss: 0.062373
Train Epoch: 3 25600/60000 (43%)	Loss: 0.064611
Train Epoch: 3 32000/60000 (53%)	Loss: 0.091679
Train Epoch: 3 38400/60000 (64%)	Loss: 0.014891
Train Epoch: 3 44800/60000 (75%)	Loss: 0.058871
Train Epoch: 3 51200/60000 (85%)	Loss: 0.027299
Train Epoch: 3 57600/60000 (96%)	Loss: 0.134729

Test set: Average loss: 0.0493, 

## Save Model

In [46]:
save_path = 'my_model_weight.pt'

In [47]:
torch.save(model.state_dict(), save_path)

In [48]:
model.state_dict().keys()

odict_keys(['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias'])

In [49]:
model.state_dict()['conv1.weight']

tensor([[[[-0.0777, -0.0884, -0.0409, -0.0320,  0.0401],
          [ 0.0228, -0.0562, -0.0643,  0.0323,  0.0460],
          [-0.0794, -0.1039,  0.0469,  0.1321, -0.0547],
          [-0.0731,  0.0013, -0.0385,  0.0588,  0.0364],
          [ 0.0613,  0.1415,  0.0725, -0.1009, -0.0853]],

         [[-0.1049, -0.1703, -0.0823,  0.0129,  0.0003],
          [-0.1068, -0.0632,  0.0139,  0.0517,  0.1582],
          [-0.0061, -0.1221, -0.0458,  0.1592, -0.0167],
          [ 0.0314,  0.1156,  0.1580,  0.1787,  0.0602],
          [-0.0497,  0.1080,  0.1445, -0.0205, -0.0081]],

         [[-0.0195, -0.1453,  0.0263, -0.0247,  0.0658],
          [-0.1404, -0.1313, -0.1317,  0.0947,  0.1507],
          [ 0.0252, -0.0008, -0.0283,  0.2006,  0.0048],
          [-0.0963, -0.0037,  0.0167,  0.0662, -0.0479],
          [ 0.0352,  0.0725, -0.0022, -0.0058,  0.0005]]],


        [[[ 0.0958, -0.0685,  0.0597, -0.0613,  0.1163],
          [-0.0223, -0.0744,  0.0795, -0.0611, -0.0403],
          [ 0.1178,  0.

In [50]:
model2 = Net().to(device)

In [51]:
model2.state_dict()['conv1.weight']

tensor([[[[-0.1135, -0.0841, -0.0618, -0.0286,  0.0589],
          [-0.0281, -0.0581, -0.0325,  0.0876, -0.0956],
          [ 0.0584,  0.1144, -0.0692,  0.0882,  0.0176],
          [ 0.1056,  0.0666, -0.0670,  0.0387,  0.0095],
          [-0.0619,  0.0742,  0.0759, -0.0128, -0.1146]],

         [[-0.0517,  0.0984,  0.0434,  0.1053,  0.1001],
          [ 0.0597,  0.0788,  0.0389,  0.0787,  0.1095],
          [-0.0334, -0.0673,  0.0075,  0.0088, -0.1114],
          [ 0.0961, -0.0445,  0.0075,  0.1136, -0.0954],
          [-0.0382, -0.1032,  0.0225,  0.0420, -0.1057]],

         [[-0.0025, -0.0916,  0.0132,  0.0496,  0.0266],
          [ 0.0352, -0.0018, -0.0100, -0.0764, -0.0236],
          [ 0.0248, -0.0513,  0.1036,  0.0026, -0.0171],
          [ 0.0844, -0.1128,  0.0915, -0.1056,  0.0737],
          [ 0.1063,  0.0167, -0.0524,  0.0320,  0.0738]]],


        [[[-0.0934,  0.0124,  0.0541, -0.0218, -0.0284],
          [-0.0468, -0.0915, -0.0528, -0.0878,  0.0004],
          [-0.0632, -0.

In [52]:
weight_dict = torch.load(save_path)

In [53]:
weight_dict.keys()

odict_keys(['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias'])

In [54]:
weight_dict['conv1.weight']

tensor([[[[-0.0777, -0.0884, -0.0409, -0.0320,  0.0401],
          [ 0.0228, -0.0562, -0.0643,  0.0323,  0.0460],
          [-0.0794, -0.1039,  0.0469,  0.1321, -0.0547],
          [-0.0731,  0.0013, -0.0385,  0.0588,  0.0364],
          [ 0.0613,  0.1415,  0.0725, -0.1009, -0.0853]],

         [[-0.1049, -0.1703, -0.0823,  0.0129,  0.0003],
          [-0.1068, -0.0632,  0.0139,  0.0517,  0.1582],
          [-0.0061, -0.1221, -0.0458,  0.1592, -0.0167],
          [ 0.0314,  0.1156,  0.1580,  0.1787,  0.0602],
          [-0.0497,  0.1080,  0.1445, -0.0205, -0.0081]],

         [[-0.0195, -0.1453,  0.0263, -0.0247,  0.0658],
          [-0.1404, -0.1313, -0.1317,  0.0947,  0.1507],
          [ 0.0252, -0.0008, -0.0283,  0.2006,  0.0048],
          [-0.0963, -0.0037,  0.0167,  0.0662, -0.0479],
          [ 0.0352,  0.0725, -0.0022, -0.0058,  0.0005]]],


        [[[ 0.0958, -0.0685,  0.0597, -0.0613,  0.1163],
          [-0.0223, -0.0744,  0.0795, -0.0611, -0.0403],
          [ 0.1178,  0.

In [55]:
model2.load_state_dict(weight_dict)

<All keys matched successfully>

In [56]:
model2.eval()

Net(
  (conv1): Conv2d(3, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)

## Save Entire Model

In [57]:
save_model_path = 'my_model.pt'

In [58]:
torch.save(model, save_model_path)

In [59]:
model3 = torch.load(save_model_path)

In [60]:
model3.state_dict()['conv1.weight']

tensor([[[[-0.0777, -0.0884, -0.0409, -0.0320,  0.0401],
          [ 0.0228, -0.0562, -0.0643,  0.0323,  0.0460],
          [-0.0794, -0.1039,  0.0469,  0.1321, -0.0547],
          [-0.0731,  0.0013, -0.0385,  0.0588,  0.0364],
          [ 0.0613,  0.1415,  0.0725, -0.1009, -0.0853]],

         [[-0.1049, -0.1703, -0.0823,  0.0129,  0.0003],
          [-0.1068, -0.0632,  0.0139,  0.0517,  0.1582],
          [-0.0061, -0.1221, -0.0458,  0.1592, -0.0167],
          [ 0.0314,  0.1156,  0.1580,  0.1787,  0.0602],
          [-0.0497,  0.1080,  0.1445, -0.0205, -0.0081]],

         [[-0.0195, -0.1453,  0.0263, -0.0247,  0.0658],
          [-0.1404, -0.1313, -0.1317,  0.0947,  0.1507],
          [ 0.0252, -0.0008, -0.0283,  0.2006,  0.0048],
          [-0.0963, -0.0037,  0.0167,  0.0662, -0.0479],
          [ 0.0352,  0.0725, -0.0022, -0.0058,  0.0005]]],


        [[[ 0.0958, -0.0685,  0.0597, -0.0613,  0.1163],
          [-0.0223, -0.0744,  0.0795, -0.0611, -0.0403],
          [ 0.1178,  0.

## Save, Load and Resuming Training

In [61]:
checkpoint_path = 'checkpoint.pt'

In [62]:
torch.save({
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss
}, checkpoint_path)

In [63]:
model4 = Net().to(device)

In [64]:
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

In [65]:
checkpoint = torch.load(checkpoint_path)

In [66]:
checkpoint.keys()

dict_keys(['epoch', 'model_state_dict', 'optimizer_state_dict', 'loss'])

In [67]:
model4.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

In [68]:
model4

Net(
  (conv1): Conv2d(3, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)

In [69]:
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.0001
    momentum: 0.5
    nesterov: False
    weight_decay: 0
)

In [70]:
epoch

5

In [71]:
loss

tensor(0.0974, device='cuda:0', requires_grad=True)