In [1]:
# Use GAN to generate synthesized characters on MNIST dataset

In [2]:
import os
import torch
import torchvision
import torch.nn as nn
from torchvision import transforms
from torchvision.utils import save_image
from torchsummary import summary as PyTorchSummary

import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [4]:
latent_size = 32
hidden_size1 = 256
hidden_size2 = 128
image_size = 28*28
num_epochs = 200
batch_size = 100
sample_dir = 'samples'

In [5]:
# Create a directory if not exists
if not os.path.exists(sample_dir):
    os.makedirs(sample_dir)

In [6]:
transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(mean = (0.5,0.5,0.5),
                                     std = (0.5, 0.5, 0.5))])

In [7]:
# MNIST dataset

In [8]:
mnist = torchvision.datasets.MNIST(root='./../data',
                                          train = True,
                                          transform = transform,
                                          download = True)

In [9]:
data_loader = torch.utils.data.DataLoader(dataset = mnist,
                                          batch_size = batch_size,
                                          shuffle = True)

## Build model

In [10]:
# Discriminator: a simple classification model

In [11]:
D = nn.Sequential(
    nn.Linear(image_size, hidden_size1),
    nn.LeakyReLU(0.2),
    nn.Linear(hidden_size1, hidden_size2),
    nn.LeakyReLU(0.2),
    nn.Linear(hidden_size2, 1),
    nn.Sigmoid())

In [12]:
PyTorchSummary(D, input_size = (1,image_size))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 256]         200,960
         LeakyReLU-2               [-1, 1, 256]               0
            Linear-3               [-1, 1, 128]          32,896
         LeakyReLU-4               [-1, 1, 128]               0
            Linear-5                 [-1, 1, 1]             129
           Sigmoid-6                 [-1, 1, 1]               0
Total params: 233,985
Trainable params: 233,985
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 0.89
Estimated Total Size (MB): 0.90
----------------------------------------------------------------


In [13]:
# Generator: like an autoencoder

In [14]:
G = nn.Sequential(
    nn.Linear(latent_size, hidden_size2),
    nn.ReLU(),
    nn.Linear(hidden_size2, hidden_size1),
    nn.ReLU(),
    nn.Linear(hidden_size1, image_size),
    nn.Tanh())

In [15]:
PyTorchSummary(G, input_size = (1, latent_size))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 128]           4,224
              ReLU-2               [-1, 1, 128]               0
            Linear-3               [-1, 1, 256]          33,024
              ReLU-4               [-1, 1, 256]               0
            Linear-5               [-1, 1, 784]         201,488
              Tanh-6               [-1, 1, 784]               0
Total params: 238,736
Trainable params: 238,736
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.02
Params size (MB): 0.91
Estimated Total Size (MB): 0.93
----------------------------------------------------------------


In [16]:
# Device setting
D = D.to(device)
G = G.to(device)


In [17]:
# Binary cross entropy loss and optimizer
criterion = nn.BCELoss()
d_optimizer = torch.optim.Adam(D.parameters(), lr=0.0004)
g_optimizer = torch.optim.Adam(G.parameters(), lr=0.0004)

In [18]:
def denorm(x):
    out = (x+1)/2
    return out.clamp(0, 1)

In [19]:
def reset_grad():
    d_optimizer.zero_grad()
    g_optimizer.zero_grad()

## Training

In [20]:
total_step = len(data_loader)

In [21]:
for epoch in range(num_epochs):
    for i, (images,_) in enumerate(data_loader):
        images = images.reshape(batch_size,-1).to(device)
        
        #create labels with are later used as input for BCE loss
        real_labels = torch.ones(batch_size,1).to(device)
        fake_labels = torch.zeros(batch_size, 1).to(device)
        
        # ================================================================== #
        #                      Train the discriminator                       #
        # ================================================================== #
        
        #compute BCE_loss using real images where BCE_loss(x,y) = -y*log(D(x)) - (1-y)*log(1-D(x))
        #second term of the loss is always zero for real cases because y=1 for real case
        outputs = D(images)
        d_loss_real = criterion(outputs, real_labels)
        real_score = outputs
        
        #compute BCEloss using fake images
        #First term of the loss is always zero for fake images
        z = torch.randn(batch_size, latent_size).to(device)
        fake_images = G(z)
        outputs = D(fake_images)
        d_loss_fake = criterion(outputs, fake_labels)
        fake_score = outputs
        
        #backprop and optimize
        d_loss = 0.5*d_loss_real + 0.5*d_loss_fake
        reset_grad()
        d_loss.backward()
        d_optimizer.step()
        
        # ================================================================== #
        #                        Train the generator                         #
        # ================================================================== #

        z = torch.randn(batch_size, latent_size).to(device)
        fake_images = G(z)
        outputs = D(fake_images)
        
        #train G to maximize log(D(G(z))) instead of minimizing log(1-D(G(z)))
        #use real label to keep the first term so loss = -log(D(G(z)))
        g_loss = criterion(outputs, real_labels)
        
        #back_prop
        reset_grad()
        g_loss.backward()
        g_optimizer.step()
        
        if (i+1) % 200 == 0:
            print('Epoch [{}/{}], Step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, D(G(z)): {:.2f}' 
                  .format(epoch, num_epochs, i+1, total_step, d_loss.item(), g_loss.item(), 
                          real_score.mean().item(), fake_score.mean().item()))
            
    # Save real images
    if (epoch+1) == 1:
        images = images.reshape(images.size(0), 1, 28, 28)
        save_image(denorm(images), os.path.join(sample_dir, 'real_images.png'))
        
    # Save sampled images
    fake_images = fake_images.reshape(fake_images.size(0), 1, 28, 28)
    save_image(denorm(fake_images), os.path.join(sample_dir, 'fake_images-{}.png'.format(epoch+1)))
    

Epoch [0/200], Step [200/600], d_loss: 0.0181, g_loss: 4.0203, D(x): 1.00, D(G(z)): 0.03
Epoch [0/200], Step [400/600], d_loss: 0.0202, g_loss: 5.7997, D(x): 0.98, D(G(z)): 0.02
Epoch [0/200], Step [600/600], d_loss: 0.0321, g_loss: 6.7587, D(x): 0.99, D(G(z)): 0.05
Epoch [1/200], Step [200/600], d_loss: 0.1488, g_loss: 7.3343, D(x): 0.86, D(G(z)): 0.03
Epoch [1/200], Step [400/600], d_loss: 0.1207, g_loss: 6.7154, D(x): 0.94, D(G(z)): 0.05
Epoch [1/200], Step [600/600], d_loss: 0.0659, g_loss: 5.5521, D(x): 0.97, D(G(z)): 0.06
Epoch [2/200], Step [200/600], d_loss: 0.3451, g_loss: 1.8665, D(x): 0.85, D(G(z)): 0.33
Epoch [2/200], Step [400/600], d_loss: 0.5627, g_loss: 1.7490, D(x): 0.70, D(G(z)): 0.41
Epoch [2/200], Step [600/600], d_loss: 1.2951, g_loss: 0.8424, D(x): 0.47, D(G(z)): 0.58
Epoch [3/200], Step [200/600], d_loss: 0.0680, g_loss: 4.0013, D(x): 0.96, D(G(z)): 0.08
Epoch [3/200], Step [400/600], d_loss: 0.2877, g_loss: 1.9643, D(x): 0.77, D(G(z)): 0.19
Epoch [3/200], Step [

Epoch [30/200], Step [600/600], d_loss: 0.2175, g_loss: 4.0082, D(x): 0.83, D(G(z)): 0.07
Epoch [31/200], Step [200/600], d_loss: 0.2308, g_loss: 3.0485, D(x): 0.86, D(G(z)): 0.14
Epoch [31/200], Step [400/600], d_loss: 0.1789, g_loss: 3.6772, D(x): 0.91, D(G(z)): 0.16
Epoch [31/200], Step [600/600], d_loss: 0.1384, g_loss: 2.6540, D(x): 0.92, D(G(z)): 0.12
Epoch [32/200], Step [200/600], d_loss: 0.2619, g_loss: 3.6859, D(x): 0.78, D(G(z)): 0.08
Epoch [32/200], Step [400/600], d_loss: 0.1972, g_loss: 4.3199, D(x): 0.85, D(G(z)): 0.08
Epoch [32/200], Step [600/600], d_loss: 0.2250, g_loss: 3.7035, D(x): 0.85, D(G(z)): 0.12
Epoch [33/200], Step [200/600], d_loss: 0.2687, g_loss: 2.9234, D(x): 0.85, D(G(z)): 0.19
Epoch [33/200], Step [400/600], d_loss: 0.2413, g_loss: 3.1506, D(x): 0.88, D(G(z)): 0.18
Epoch [33/200], Step [600/600], d_loss: 0.2482, g_loss: 3.0968, D(x): 0.80, D(G(z)): 0.09
Epoch [34/200], Step [200/600], d_loss: 0.2058, g_loss: 2.4273, D(x): 0.89, D(G(z)): 0.18
Epoch [34/

Epoch [61/200], Step [400/600], d_loss: 0.3123, g_loss: 3.0580, D(x): 0.79, D(G(z)): 0.17
Epoch [61/200], Step [600/600], d_loss: 0.3444, g_loss: 2.7608, D(x): 0.89, D(G(z)): 0.28
Epoch [62/200], Step [200/600], d_loss: 0.2956, g_loss: 2.3603, D(x): 0.78, D(G(z)): 0.15
Epoch [62/200], Step [400/600], d_loss: 0.3298, g_loss: 2.1248, D(x): 0.79, D(G(z)): 0.20
Epoch [62/200], Step [600/600], d_loss: 0.3248, g_loss: 2.5872, D(x): 0.79, D(G(z)): 0.19
Epoch [63/200], Step [200/600], d_loss: 0.3487, g_loss: 2.2787, D(x): 0.74, D(G(z)): 0.16
Epoch [63/200], Step [400/600], d_loss: 0.4373, g_loss: 2.0705, D(x): 0.78, D(G(z)): 0.29
Epoch [63/200], Step [600/600], d_loss: 0.2891, g_loss: 1.9262, D(x): 0.84, D(G(z)): 0.23
Epoch [64/200], Step [200/600], d_loss: 0.4157, g_loss: 2.1564, D(x): 0.79, D(G(z)): 0.30
Epoch [64/200], Step [400/600], d_loss: 0.3048, g_loss: 2.1218, D(x): 0.79, D(G(z)): 0.17
Epoch [64/200], Step [600/600], d_loss: 0.3417, g_loss: 2.5691, D(x): 0.79, D(G(z)): 0.21
Epoch [65/

Epoch [92/200], Step [200/600], d_loss: 0.3367, g_loss: 2.5523, D(x): 0.75, D(G(z)): 0.19
Epoch [92/200], Step [400/600], d_loss: 0.3215, g_loss: 2.1063, D(x): 0.83, D(G(z)): 0.26
Epoch [92/200], Step [600/600], d_loss: 0.4772, g_loss: 1.5317, D(x): 0.73, D(G(z)): 0.33
Epoch [93/200], Step [200/600], d_loss: 0.4315, g_loss: 2.0930, D(x): 0.73, D(G(z)): 0.29
Epoch [93/200], Step [400/600], d_loss: 0.6277, g_loss: 2.1809, D(x): 0.62, D(G(z)): 0.29
Epoch [93/200], Step [600/600], d_loss: 0.4292, g_loss: 1.6621, D(x): 0.70, D(G(z)): 0.28
Epoch [94/200], Step [200/600], d_loss: 0.3994, g_loss: 1.8056, D(x): 0.70, D(G(z)): 0.24
Epoch [94/200], Step [400/600], d_loss: 0.4313, g_loss: 1.3818, D(x): 0.71, D(G(z)): 0.28
Epoch [94/200], Step [600/600], d_loss: 0.3466, g_loss: 1.7253, D(x): 0.77, D(G(z)): 0.24
Epoch [95/200], Step [200/600], d_loss: 0.4063, g_loss: 1.5930, D(x): 0.73, D(G(z)): 0.29
Epoch [95/200], Step [400/600], d_loss: 0.3398, g_loss: 2.1149, D(x): 0.73, D(G(z)): 0.20
Epoch [95/

Epoch [122/200], Step [400/600], d_loss: 0.5738, g_loss: 1.7050, D(x): 0.66, D(G(z)): 0.36
Epoch [122/200], Step [600/600], d_loss: 0.4240, g_loss: 1.4492, D(x): 0.73, D(G(z)): 0.29
Epoch [123/200], Step [200/600], d_loss: 0.5756, g_loss: 1.1473, D(x): 0.62, D(G(z)): 0.32
Epoch [123/200], Step [400/600], d_loss: 0.4398, g_loss: 1.7043, D(x): 0.64, D(G(z)): 0.25
Epoch [123/200], Step [600/600], d_loss: 0.5669, g_loss: 1.4648, D(x): 0.64, D(G(z)): 0.29
Epoch [124/200], Step [200/600], d_loss: 0.6374, g_loss: 1.2618, D(x): 0.66, D(G(z)): 0.41
Epoch [124/200], Step [400/600], d_loss: 0.5496, g_loss: 1.7916, D(x): 0.63, D(G(z)): 0.33
Epoch [124/200], Step [600/600], d_loss: 0.4780, g_loss: 1.3682, D(x): 0.68, D(G(z)): 0.31
Epoch [125/200], Step [200/600], d_loss: 0.4610, g_loss: 1.3132, D(x): 0.70, D(G(z)): 0.31
Epoch [125/200], Step [400/600], d_loss: 0.4968, g_loss: 1.3643, D(x): 0.73, D(G(z)): 0.36
Epoch [125/200], Step [600/600], d_loss: 0.5258, g_loss: 1.3148, D(x): 0.70, D(G(z)): 0.40

Epoch [152/200], Step [600/600], d_loss: 0.4516, g_loss: 1.4155, D(x): 0.72, D(G(z)): 0.34
Epoch [153/200], Step [200/600], d_loss: 0.4423, g_loss: 1.4427, D(x): 0.70, D(G(z)): 0.27
Epoch [153/200], Step [400/600], d_loss: 0.4397, g_loss: 1.3572, D(x): 0.68, D(G(z)): 0.28
Epoch [153/200], Step [600/600], d_loss: 0.4402, g_loss: 1.4326, D(x): 0.72, D(G(z)): 0.33
Epoch [154/200], Step [200/600], d_loss: 0.4605, g_loss: 1.2056, D(x): 0.69, D(G(z)): 0.30
Epoch [154/200], Step [400/600], d_loss: 0.4017, g_loss: 1.5262, D(x): 0.73, D(G(z)): 0.30
Epoch [154/200], Step [600/600], d_loss: 0.5510, g_loss: 1.8834, D(x): 0.62, D(G(z)): 0.27
Epoch [155/200], Step [200/600], d_loss: 0.4240, g_loss: 1.3936, D(x): 0.72, D(G(z)): 0.33
Epoch [155/200], Step [400/600], d_loss: 0.4567, g_loss: 1.4747, D(x): 0.66, D(G(z)): 0.28
Epoch [155/200], Step [600/600], d_loss: 0.5343, g_loss: 1.4299, D(x): 0.62, D(G(z)): 0.32
Epoch [156/200], Step [200/600], d_loss: 0.4860, g_loss: 1.5032, D(x): 0.66, D(G(z)): 0.29

Epoch [183/200], Step [200/600], d_loss: 0.4652, g_loss: 1.3914, D(x): 0.69, D(G(z)): 0.33
Epoch [183/200], Step [400/600], d_loss: 0.5492, g_loss: 1.2242, D(x): 0.66, D(G(z)): 0.37
Epoch [183/200], Step [600/600], d_loss: 0.5103, g_loss: 1.3002, D(x): 0.66, D(G(z)): 0.35
Epoch [184/200], Step [200/600], d_loss: 0.5412, g_loss: 1.9624, D(x): 0.63, D(G(z)): 0.32
Epoch [184/200], Step [400/600], d_loss: 0.4449, g_loss: 1.6722, D(x): 0.64, D(G(z)): 0.22
Epoch [184/200], Step [600/600], d_loss: 0.4141, g_loss: 1.2281, D(x): 0.72, D(G(z)): 0.31
Epoch [185/200], Step [200/600], d_loss: 0.5278, g_loss: 1.4074, D(x): 0.59, D(G(z)): 0.28
Epoch [185/200], Step [400/600], d_loss: 0.4768, g_loss: 1.5923, D(x): 0.73, D(G(z)): 0.35
Epoch [185/200], Step [600/600], d_loss: 0.4450, g_loss: 1.4309, D(x): 0.67, D(G(z)): 0.29
Epoch [186/200], Step [200/600], d_loss: 0.5267, g_loss: 1.2208, D(x): 0.65, D(G(z)): 0.34
Epoch [186/200], Step [400/600], d_loss: 0.4945, g_loss: 1.2879, D(x): 0.68, D(G(z)): 0.31

In [22]:

# Save the model checkpoints 
torch.save(G.state_dict(), 'G.ckpt')
torch.save(D.state_dict(), 'D.ckpt')