In [1]:
import torch
import torchvision
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.utils import save_image
from torchvision.datasets import MNIST
from mnist_model import mnist
import os

if not os.path.exists('./vae_img'):
    os.mkdir('./vae_img')
mnist_path = './mnist.pth'

def to_img(x):
    x = 0.5 * (x + 1)
    x = x.clamp(0, 1)
    x = x.view(x.size(0), 1, 28, 28)
    return x


num_epochs = 100
batch_size = 128
learning_rate = 1e-3

img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
    ])

dataset = MNIST('./data', transform=img_transform, download=True)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)


class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()

        self.fc1 = nn.Linear(784, 400)
        self.fc21 = nn.Linear(400, 20)
        self.fc22 = nn.Linear(400, 20)
        self.fc3 = nn.Linear(20, 400)
        self.fc4 = nn.Linear(400, 784)

    def encode(self, x):
        h1 = F.relu(self.fc1(x))
        return self.fc21(h1), self.fc22(h1)

    def reparametrize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        eps = torch.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)

    def decode(self, z):
        h3 = F.relu(self.fc3(z))
        return torch.tanh(self.fc4(h3))

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparametrize(mu, logvar)
        return self.decode(z), mu, logvar

model = VAE()

reconstruction_function = nn.MSELoss(reduction='sum')


def loss_function(recon_x, x, mu, logvar):
    """
    recon_x: generating images
    x: origin images
    mu: latent mean
    logvar: latent log variance
    """
    BCE = reconstruction_function(recon_x, x)  # mse loss
    # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
    KLD = torch.sum(KLD_element).mul_(-0.5)
    # KL divergence
    return BCE + KLD


optimizer = optim.Adam(model.parameters(), lr=1e-3)
mnist = mnist(pretrained=True)

In [2]:
for epoch in range(num_epochs):
    model.train()
    mnist.eval()
    eval_acc = 0.
    for data in dataloader:
        img, label = data
        img = img.view(img.size(0), -1)
        img = Variable(img)
        # ===================forward=====================
        optimizer.zero_grad()
        output, mu, logvar = model(img)
        loss = loss_function(output, img, mu, logvar)
        # ===================backward====================
        loss.backward()
        optimizer.step()
        # ===================mnist test==================
        out = mnist(output)
        
        _, pred = torch.max(out, 1)
        eval_acc += (pred == label).float().mean()
    # ===================log========================
    print('epoch [{}/{}], loss:{:.4f}, Acc:{:.6f}'
          .format(epoch + 1, num_epochs, loss.item(),eval_acc/len(dataloader)))
        
    if epoch % 5 == 0:
        save = to_img(output[0:8].cpu().data)
        save_image(save, './vae_img/image_{}.png'.format(epoch))

torch.save(model.state_dict(), './vae.pth')

epoch [1/100], loss:31882.8711, Acc:0.772160


KeyboardInterrupt: 