<a href="https://colab.research.google.com/github/vedvkandge2000/Deep-Learning-Project/blob/master/Variational_Autoencoders_in_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torchvision.utils import save_image

In [2]:
# Define hyperparametrs

image_size = 784
hidden_dim = 400
latent_dim = 20
batch_size = 128
epochs = 30

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='/content/data',
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True)

test_dataset = torchvision.datasets.MNIST(root='/content/data',
                                          train=False,
                                          transform=transforms.ToTensor())

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=True)

# Create a directory to save the reconstructed and sample images (directory is not present)

sample_dir = "result"
if not os.path.exists(sample_dir):
  os.makedirs(sample_dir)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /content/data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /content/data/MNIST/raw/train-images-idx3-ubyte.gz to /content/data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /content/data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /content/data/MNIST/raw/train-labels-idx1-ubyte.gz to /content/data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /content/data/MNIST/raw/t10k-images-idx3-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /content/data/MNIST/raw/t10k-images-idx3-ubyte.gz to /content/data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /content/data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /content/data/MNIST/raw/t10k-labels-idx1-ubyte.gz to /content/data/MNIST/raw
Processing...
Done!




In [3]:
# VAE model

class VAE(nn.Module):
  def __init__(self):
    super(VAE, self).__init__()
    self.fc1 = nn.Linear(image_size, hidden_dim)
    self.fc2_mean = nn.Linear(hidden_dim, latent_dim)
    self.fc2_logvar = nn.Linear(hidden_dim, latent_dim)
    self.fc3 = nn.Linear(latent_dim, hidden_dim)
    self.fc4 = nn.Linear(hidden_dim, image_size)

  def encode(self, x):
    h = F.relu(self.fc1(x))
    mu = self.fc2_mean(h)
    log_var = self.fc2_logvar(h)
    return mu, log_var
  
  def reparameterize(self, mu, logvar):
    std = torch.exp(logvar/2)
    eps = torch.randn_like(std)
    return (mu + eps * std)
  
  def decode(self, z):
    h = F.relu(self.fc3(z))
    out = torch.sigmoid(self.fc4(h))
    return out
  
  def forward(self,x):
    mu, logvar = self.encode(x.view(-1, image_size))
    z = self.reparameterize(mu, logvar)
    reconstructed = self.decode(z)
    return reconstructed, mu, logvar

# Define model

model = VAE().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)




In [4]:
# Define loss function

def loss_function(reconstructed_image, original_image, mu, logvar):
  bce = F.binary_cross_entropy(reconstructed_image, original_image.view(-1, 784), reduction= 'sum')
  kld = 0.5 * torch.sum(logvar.exp() + mu.pow(2) - 1 - logvar)
  return bce + kld

# Train function
def train(epoch):
  model.train()
  train_loss = 0
  for i, (images, _) in enumerate(train_loader):
    images = images.to(device)
    reconstructed, mu, logvar = model(images)
    loss = loss_function(reconstructed, images, mu, logvar)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
    
    if i % 100 == 0:
      print("Train Epoch {} [Batch {}/{}]\tLoss: {:.3f}".format(epoch, i, len(train_loader), loss.item()/len(images)))

  print('=====> Epoch {} Average Loss: {:.3f}'.format(epoch, train_loss/len(train_loader.dataset)))

#Test function
def test(epoch):
  model.eval()
  test_loss = 0
  with torch.no_grad():

    for batch_idx, (images, _) in enumerate(test_loader):

      images = images.to(device)
      reconstructed, mu, logvar = model(images)
      loss = loss_function(reconstructed, images, mu, logvar)
      test_loss += loss.item()
      if batch_idx == 0:
        comparison = torch.cat([images[:5], reconstructed.view(batch_size, 1, 28, 28)[:5]])
        save_image(comparison.cpu(), '/content/result/reconstruction_' + str(epoch) + '.png', nrow = 5)
    
  print('=====>Average Test Loss: {:.3f}'.format(test_loss/len(test_loader.dataset)))


In [5]:
# Main function
for epoch in range(1, epochs + 1):
  train(epoch)
  test(epoch)
  with torch.no_grad():
    # Get rid of the encoder and sample z from the gaussian ditribution and feed it to the decoder to generate samples
    sample = torch.randn(64,20).to(device)
    generated = model.decode(sample).cpu()
    save_image(generated.view(64,1,28,28), '/content/result/sample_' + str(epoch) + '.png')

Train Epoch 1 [Batch 0/469]	Loss: 548.220
Train Epoch 1 [Batch 100/469]	Loss: 186.281
Train Epoch 1 [Batch 200/469]	Loss: 147.330
Train Epoch 1 [Batch 300/469]	Loss: 145.080
Train Epoch 1 [Batch 400/469]	Loss: 136.225
=====> Epoch 1 Average Loss: 165.047
=====>Average Test Loss: 127.433
Train Epoch 2 [Batch 0/469]	Loss: 130.643
Train Epoch 2 [Batch 100/469]	Loss: 125.309
Train Epoch 2 [Batch 200/469]	Loss: 122.150
Train Epoch 2 [Batch 300/469]	Loss: 123.412
Train Epoch 2 [Batch 400/469]	Loss: 120.163
=====> Epoch 2 Average Loss: 121.690
=====>Average Test Loss: 115.797
Train Epoch 3 [Batch 0/469]	Loss: 120.847
Train Epoch 3 [Batch 100/469]	Loss: 116.365
Train Epoch 3 [Batch 200/469]	Loss: 111.753
Train Epoch 3 [Batch 300/469]	Loss: 109.552
Train Epoch 3 [Batch 400/469]	Loss: 112.095
=====> Epoch 3 Average Loss: 114.555
=====>Average Test Loss: 112.527
Train Epoch 4 [Batch 0/469]	Loss: 111.040
Train Epoch 4 [Batch 100/469]	Loss: 114.744
Train Epoch 4 [Batch 200/469]	Loss: 111.453
Train 

In [6]:
# In this we created mnist images using VAE.
# First we train our model on mnist dataset and then provided a random values from normal distribution to decoder to obtain mnist images.