In [1]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from tensorboardX import SummaryWriter

In [2]:
%matplotlib inline

In [3]:
batch_size = 32
learning_rate = 1e-3
num_epochs = 20

In [4]:
train_dataset = datasets.MNIST('./datas', train=True, transform=transforms.ToTensor())
test_dataset = datasets.MNIST('./datas', train=False, transform=transforms.ToTensor())

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [5]:
class Cnn(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(Cnn, self).__init__()
        
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_dim, 15, 3, stride=1, padding=1), # b 15 28 28
            nn.ReLU(True),
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 48, 3, stride=1, padding=1), # b 48 28 28
            nn.ReLU(True),
        )
        
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 32, 3, stride=1, padding=1), # b 32 28 28
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2), # b 32 14 14
        )
        
        self.conv4 = nn.Sequential(
            nn.Conv2d(32, 16, 3, stride=1, padding=1), # b 16 14 14
            nn.ReLU(True),
            nn.MaxPool2d(2, 2), # b 16 7 7
        )
        
        self.conv5 = nn.Sequential(
            nn.Conv2d(16, 16, 3, stride=1), # b 16 5 5
            nn.ReLU(True),
        )
        
        self.fc = nn.Sequential(
            nn.Linear(400, 200),
            nn.Linear(200, 100),
            nn.Linear(100, out_dim),
        )
        
    def forward(self, x):
        out1 = self.conv1(x)
        out1 = torch.cat((x, out1), dim=1)
        out2 = self.conv2(out1)
        out2 = torch.cat((out1, out2), dim=1)
        out3 = self.conv3(out2)
        out4 = self.conv4(out3)
        out5 = self.conv5(out4)
        
        weight1 = torch.randn(out1.size(1), 1, 3, 3).cuda()
        deconv1 = F.conv_transpose2d(out1, weight1)
        
        weight2 = torch.randn(out2.size(1), 1, 3, 3).cuda()
        deconv2 = F.conv_transpose2d(out2, weight2)
        
        weight3 = torch.randn(out3.size(1), 1, 3, 3).cuda()
        deconv3 = F.conv_transpose2d(out3, weight3)
        
        weight4 = torch.randn(out4.size(1), 1, 3, 3).cuda()
        deconv4 = F.conv_transpose2d(out4, weight4)
        
        weight5 = torch.randn(out5.size(1), 1, 3, 3).cuda()
        deconv5 = F.conv_transpose2d(out5, weight5)
        
        out5 = out5.view(out5.size(0), -1)
        return self.fc(out5), deconv1, deconv2, deconv3, deconv4, deconv5
    

In [6]:
model = Cnn(1, 10).cuda()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
writer = SummaryWriter('./log/cnn4')

In [8]:
for epoch in range(num_epochs):
    
    running_loss = .0
    running_acc = .0
    total_step = 0
    for i, data in enumerate(train_loader, 1):
        img, label = data
        img = img.cuda()
        label = label.cuda()
        
        out, deconv1, deconv2, deconv3, deconv4, deconv5 = model(img)
        loss = criterion(out, label)
        
        running_loss += loss.item() * img.size(0)
        
        _, pred = torch.max(out, 1)
        running_acc += (pred == label).sum().item()
        
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        step = epoch * len(train_loader) + i
        accuracy = (pred == label).float().mean()
        
        writer.add_scalar('loss', loss.item(), step)
        writer.add_scalar('accuracy', accuracy, step)
        writer.add_image('images', torchvision.utils.make_grid(img), step)
        
#         torchvision.utils.save_image(torchvision.utils.make_grid(img), 'xxx.jpg')
#         torchvision.utils.save_image(torchvision.utils.make_grid(deconv1 + img.mean()), 'xxx1.jpg')
#         torchvision.utils.save_image(torchvision.utils.make_grid(deconv2 + img.mean()), 'xxx2.jpg')
#         torchvision.utils.save_image(torchvision.utils.make_grid(deconv3 + img.mean()), 'xxx3.jpg')
#         torchvision.utils.save_image(torchvision.utils.make_grid(img), 'xxx.jpg')
#         torchvision.utils.save_image(torchvision.utils.make_grid(deconv1.abs_()), 'xxx1.jpg')
#         torchvision.utils.save_image(torchvision.utils.make_grid(deconv2.abs_()), 'xxx2.jpg')
#         torchvision.utils.save_image(torchvision.utils.make_grid(deconv3.abs_()), 'xxx3.jpg')
    
        if i % 100 == 0:
            writer.add_image('deconv1', torchvision.utils.make_grid(deconv1, normalize=True, scale_each=True).data.cpu(), step)
            writer.add_image('deconv2', torchvision.utils.make_grid(deconv2, normalize=True, scale_each=True).data.cpu(), step)
            writer.add_image('deconv3', torchvision.utils.make_grid(deconv3, normalize=True, scale_each=True).data.cpu(), step)
            writer.add_image('deconv4', torchvision.utils.make_grid(deconv4, normalize=True, scale_each=True).data.cpu(), step)
            writer.add_image('deconv5', torchvision.utils.make_grid(deconv5, normalize=True, scale_each=True).data.cpu(), step)
            
            for tag, value in model.named_parameters():
                if tag.startswith('deconv'):
                    continue
                tag = tag.replace('.', '/')
                writer.add_histogram(tag, value.cpu().data.numpy(), step)
                writer.add_histogram(tag + '/grad', value.grad.cpu().data.numpy(), step)
        
        total_step += img.size(0)
        if i % 500 == 0:
            print 'Epoch: [{}/{}], Loss: {:.6f}, Acc: {:.6f}'.format(epoch + 1, num_epochs, \
                                                                    running_loss / total_step, \
                                                                    running_acc / total_step)
            
    print 'Finish {} Epoch, Loss: {:.6f}, Acc: {:.6f}'.format(epoch + 1, \
                                                             running_loss / len(train_dataset), \
                                                             running_acc / len(train_dataset))
        
    model.eval()
    eval_loss = .0
    eval_acc = .0
    for data in test_loader:
        img, label = data
        img = img.cuda()
        label = label.cuda()
        
        out, deconv1, deconv2, deconv3, deconv4, deconv5 = model(img)
        loss = criterion(out, label)
        
        eval_loss += loss.item() * img.size(0)
        
        _, pred = torch.max(out, 1)
        eval_acc += (pred == label).sum().item()
        
    print 'Eval Loss: {:.6f}, Eval Acc: {:.6f}'.format(eval_loss / len(test_dataset), eval_acc / len(test_dataset))
    
    model.train()

Epoch: [1/20], Loss: 0.345186, Acc: 0.888938
Epoch: [1/20], Loss: 0.227549, Acc: 0.928219
Epoch: [1/20], Loss: 0.181652, Acc: 0.942937
Finish 1 Epoch, Loss: 0.159202, Acc: 0.950233
Eval Loss: 0.050366, Eval Acc: 0.983900
Epoch: [2/20], Loss: 0.060580, Acc: 0.980437
Epoch: [2/20], Loss: 0.059711, Acc: 0.981437
Epoch: [2/20], Loss: 0.058604, Acc: 0.981667
Finish 2 Epoch, Loss: 0.057556, Acc: 0.982067
Eval Loss: 0.051072, Eval Acc: 0.984800
Epoch: [3/20], Loss: 0.043387, Acc: 0.986688
Epoch: [3/20], Loss: 0.044034, Acc: 0.986563
Epoch: [3/20], Loss: 0.045098, Acc: 0.986125
Finish 3 Epoch, Loss: 0.044909, Acc: 0.986267
Eval Loss: 0.038065, Eval Acc: 0.986300
Epoch: [4/20], Loss: 0.039516, Acc: 0.987625
Epoch: [4/20], Loss: 0.039395, Acc: 0.987531
Epoch: [4/20], Loss: 0.038906, Acc: 0.987875
Finish 4 Epoch, Loss: 0.038790, Acc: 0.988233
Eval Loss: 0.043282, Eval Acc: 0.984700
Epoch: [5/20], Loss: 0.030160, Acc: 0.990437
Epoch: [5/20], Loss: 0.032330, Acc: 0.990125
Epoch: [5/20], Loss: 0.032

In [9]:
writer.close()

In [10]:
torch.save(model.state_dict(), './ser/cnn4.pth')