#### Convolutional Variational Autoencoder on generating Face imagses 

Use convolutional VAE because we are handling images 

In [1]:
import torch 
import torch.nn as nn 
import torchvision 
import time 
import matplotlib.pyplot as plt 

Helper function imports (not all might be used)

In [2]:
from helper_data import get_dataloaders_celeba
from helper_data import UnNormalize
from helper_train import train_vae_v1
from helper_utils import set_deterministic, set_all_seeds
from helper_plotting import plot_accuracy, plot_training_loss
from helper_plotting import plot_generated_images
from helper_plotting import plot_latent_space_with_labels
from helper_plotting import plot_images_sampled_from_vae

Imports for dataset

In [3]:
from torchvision import datasets 
from torch.utils.data import DataLoader 

Settigs for the experiment/model  
- device 
- random_seed set up 
- Learning Rate 
- num_epochs 
- batch size 

In [4]:
DEVICE= torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE) 

RANDOM_SEED = 123 
LEARNING_RATE= 0.0005 
NUM_EPOCHS= 50 
BATCH_SIZE= 256 


cuda


In [5]:
set_deterministic   # configure PyTorch to reproduce same weights at every iteration. Set CuDNN to be deterministic 
set_all_seeds(123)   # set deterministic to all random in pytorch and Numpy

##### Load the dataset

In [6]:
def get_data_celeba(batch_size, num_workers=0, train_transform= None, test_transform=None, download=True):
    """Labels (ground turth) are 40-dim vectors representing
    00 - 5_o_Clock_Shadow
    01 - Arched_Eyebrows
    02 - Attractive 
    03 - Bags_Under_Eyes
    04 - Bald
    05 - Bangs
    06 - Big_Lips
    07 - Big_Nose
    08 - Black_Hair
    09 - Blond_Hair
    10 - Blurry 
    11 - Brown_Hair 
    12 - Bushy_Eyebrows 
    13 - Chubby 
    14 - Double_Chin 
    15 - Eyeglasses 
    16 - Goatee 
    17 - Gray_Hair 
    18 - Heavy_Makeup 
    19 - High_Cheekbones 
    20 - Male 
    21 - Mouth_Slightly_Open 
    22 - Mustache 
    23 - Narrow_Eyes 
    24 - No_Beard 
    25 - Oval_Face 
    26 - Pale_Skin 
    27 - Pointy_Nose 
    28 - Receding_Hairline 
    29 - Rosy_Cheeks 
    30 - Sideburns 
    31 - Smiling 
    32 - Straight_Hair 
    33 - Wavy_Hair 
    34 - Wearing_Earrings 
    35 - Wearing_Hat 
    36 - Wearing_Lipstick 
    37 - Wearing_Necklace 
    38 - Wearing_Necktie 
    39 - Young         
    """

    # default transformation 
    if train_transform is None: 
        train_transform= torchvision.transforms.ToTensor() 
    if test_transform is None: 
        test_transform = torchvision.transforms.ToTensor() 

    # download CelebA dataset for training, split = "train" splits the data by train, download into a file called "data" in root directory (where you store this file)
    train_dataset= datasets.CelebA(root= "data", split= "train", transform=train_transform,download=download)
    valid_dataset= datasets.CelebA(root="data", split= "valid", transform= test_transform)
    test_dataset= datasets.CelebA(root= "data", split= "test", transform=test_transform)

    # load the dataset intoo DataLoader 
    train_loader= DataLoader(dataset =train_dataset, batch_size= batch_size, num_workers=num_workers,shuffle=True)
    valid_loader= DataLoader(dataset=valid_dataset, batch_size= batch_size, num_workers=num_workers, shuffle=False)
    test_loader= DataLoader(dataset=test_dataset, batch_size= batch_size, num_workers=num_workers, shuffle=False)

    return train_loader, valid_loader, test_loader

In [7]:
# transform face images for processing 
custom_transforms= torchvision.transforms.Compose(
    [
        torchvision.transforms.CenterCrop((128,128)), # crop the center to 128x128 pixels
        torchvision.transforms.ToTensor()   # transform all the image pixels into [0,1]
    ]
)

train_loader, valid_loader, test_loader=get_data_celeba(BATCH_SIZE, num_workers=2, train_transform=custom_transforms,test_transform=custom_transforms)


Files already downloaded and verified


Data Dimensions: 
- Each image is a colored (3 channels) 128 x 128 image.
- Batch_size is 256 for dataloaders, and there are 636 total batches in train_loader 

In [8]:
print("Training Data")
for image, labels in train_loader:
    print("Image Dimension: ", image.shape) 
    print("Label Dimension: ", labels.shape) 
    print(labels[0])    # print a sample label
    break 

print("Test Data ")
for image, labels in test_loader :
    print("Image Dimension: ", image.shape)
    print("Label Dimension: ", labels.shape) 
    print(labels[0])
    break

print("Total Training Batches: ",len(train_loader))

Training Data
Image Dimension:  torch.Size([256, 3, 128, 128])
Label Dimension:  torch.Size([256, 40])
tensor([0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,
        1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1])
Test Data 
Image Dimension:  torch.Size([256, 3, 128, 128])
Label Dimension:  torch.Size([256, 40])
tensor([0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
        1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0])
Total Training Batches:  636


#### Model
- Variational autoencoder where encoder is conv nets with non-linear activation functions - Samples mean and variance vector to finish encoding in the latent sapce of 200 dimensios
- Decoder uses transpose Conv net to go back to the original dimension 


In [9]:
class Reshape(nn.Module):
    def __init__(self, *args):
        super().__init__()
        self.shape= args
    def forward(self, x):
        return x.view(self.shape)
    
class Trim(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self, x): 
        return x[:,:,:128,:128] 

class VAE(nn.Module):
    def __init__(self):
        super().__init__() 
        # input dimension is batch x 3 x 128 x 128 
        self.encoder=nn.Sequential(
            nn.Conv2d(3, 32, stride= 2, kernel_size= 3, padding=1),  # batch x 32 x 64 x 64 
            nn.LeakyReLU(0.1, inplace=False),    # element wise LeakyReLU 
            
            nn.Conv2d(32, 64, stride=2, kernel_size=3, padding=1), # batch x 64 x 32 x 32
            nn.LeakyReLU(0.1, inplace=False), 
            
            nn.Conv2d(64,64,stride=2, kernel_size= 3, padding=1),   # batch x 64 x 16 x 16
            nn.LeakyReLU(0.1, inplace=False),

            nn.Conv2d(64,64, stride=2, kernel_size=3, padding=1),   # batch x 64 x 8 x 8
            nn.LeakyReLU(0.1),

            # flatten into a single vector for FC (linear transformation) layer
            nn.Flatten()    # batch x 1 x 4096 
        )
        # sample mean and variance vector 
        self.z_mean= nn.Linear(4096,200)
        self.z_log_var= nn.Linear(4096, 200)

        # decoder 
        self.decoder= nn.Sequential(
            # start with 200 dimension latent space 
            nn.Linear(200,4096), 
            # reshape the vector into batch x 64 x 8 x 8 
            Reshape(-1, 64,8,8),
            
            nn.ConvTranspose2d(64,64,kernel_size=3, stride=2),  # batch x 64 x 17 x 17
            nn.LeakyReLU(0.1, inplace =False), 
            
            nn.ConvTranspose2d(64,64, kernel_size=3, stride= 2, padding=1), # batch x 64 x 33 x 33
            nn.LeakyReLU(0.1, inplace=False),
            
            nn.ConvTranspose2d(64, 32, kernel_size=3 ,stride=2, padding=1),  # batch x 32 x 65 x 65 
            nn.LeakyReLU(0.1, inplace=False), 

            nn.ConvTranspose2d(32,3, kernel_size=3, stride=2,padding=1), # batch x 3 x 129 x 128
            Trim(),     # cut out the extra pixel to be {batch x 3 x 128 x 128}
            # non -linear activation to complete decoder 
            nn.Sigmoid() 
        )
    # reparameterize the decoded part 
    def reparameterize(self, z_mu, z_var_log):
        eps= torch.randn(z_mu.size(0), z_mu.size(1)).to(DEVICE)
        z= z_mu + eps * torch.exp(z_var_log/2.) 
        return z 

    def forward(self, x):
        x= self.encoder(x) 
        z_mean, z_var_log= self.z_mean(x), self.z_log_var(x) 
        encoded= self.reparameterize(z_mean, z_var_log)
        decoded= self.decoder(encoded)
        return encoded, z_mean, z_var_log, decoded

In [10]:
set_all_seeds(RANDOM_SEED)
model= VAE() 
model.to(DEVICE)

optimizer= torch.optim.Adam(model.parameters(), lr= LEARNING_RATE)

In [None]:
log_dict = train_vae_v1(num_epochs=NUM_EPOCHS, model=model, 
                        optimizer=optimizer, device=DEVICE, 
                        train_loader=train_loader,
                        skip_epoch_stats=True,
                        logging_interval=50)