In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms,datasets
from torch import optim
import visdom

In [2]:
mnist_train = datasets.MNIST('./datasets/mnist_data',True,
    transform = transforms.Compose([
        transforms.ToTensor()
]),download = True)
mnist_train = DataLoader(mnist_train,batch_size = 32,shuffle = True)

mnist_test = datasets.MNIST('./datasets/mnist_data/',False,
    transform = transforms.Compose([
        transforms.ToTensor()
]),download = True)
mnist_test = DataLoader(mnist_test,batch_size = 32,shuffle = True)

In [3]:
class VAE(nn.Module):

    def __init__(self):
        super(VAE, self).__init__()

        # [b, 784] => [b, 20]
        # u: [b, 10]
        # sigma: [b, 10]
        self.encoder = nn.Sequential(
            nn.Linear(784, 256),
            nn.ReLU(),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, 20),
            nn.ReLU()
        )
        # [b, 20] => [b, 784]
        self.decoder = nn.Sequential(
            nn.Linear(10, 64),
            nn.ReLU(),
            nn.Linear(64, 256),
            nn.ReLU(),
            nn.Linear(256, 784),
            nn.Sigmoid()
        )

        self.criteon = nn.MSELoss()

    def forward(self, x):
        """

        :param x: [b, 1, 28, 28]
        :return:
        """
        batch_size = x.size(0)
        # flatten
        x = x.view(batch_size, 784)
        # encoder
        # [b, 20], including mean and sigma
        h_ = self.encoder(x)
        # [b, 20] => [b, 10] and [b, 10]
        mu, sigma = h_.chunk(2, dim=1)
        # reparametrize trick, epison~N(0, 1)
        h = mu + sigma * torch.randn_like(sigma)

        # decoder
        x_hat = self.decoder(h)
        # reshape
        x_hat = x_hat.view(batch_size, 1, 28, 28)

        kld = 0.5 * torch.sum(
            torch.pow(mu, 2) +
            torch.pow(sigma, 2) -
            torch.log(1e-8 + torch.pow(sigma, 2)) - 1
        ) / (batch_size * 28 * 28)

        return x_hat, kld

In [4]:
%%time

device = torch.device('cuda')
model = VAE().to(device)
criteon = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr = 1e-3)

viz = visdom.Visdom()

print(model)

for epoch in range(10):
    for batch_idx,(x,_) in enumerate(mnist_train):
        x = x.to(device)
        x_hat,_ = model(x)
        loss = criteon(x_hat,x)
        
        # backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(f'epoch : {epoch},loss : {loss.item()}')
    
    x,_ = iter(mnist_test).next()
    x = x.to(device)
    with torch.no_grad(): 
        x_hat,_ = model(x)
    viz.images(x,nrow = 8,win = 'x',opts = dict(title = 'x'))
    viz.images(x_hat,nrow = 8,win = 'x_hat',opts = dict(title = 'x_hat'))

Setting up a new session...


VAE(
  (encoder): Sequential(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=20, bias=True)
    (5): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=10, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=784, bias=True)
    (5): Sigmoid()
  )
  (criteon): MSELoss()
)
epoch : 0,loss : 0.028102461248636246
epoch : 1,loss : 0.023981500416994095
epoch : 2,loss : 0.022302860394120216
epoch : 3,loss : 0.018385138362646103
epoch : 4,loss : 0.0192782673984766
epoch : 5,loss : 0.015221218578517437
epoch : 6,loss : 0.01361935492604971
epoch : 7,loss : 0.015653513371944427
epoch : 8,loss : 0.019185714423656464
epoch : 9,loss : 0.016902988776564598
Wall time: 2min 13s
