# CNN VAE for SRL for DIY Self driving car

In this notebook you will learn the CNN VAE(beta) model. The result model is used for state representation in reinforcement learning.

First collection training data. you can use notebooks\utility\data_collection.ipynb 

Collect images of the course while driving the car on the course. Collect 1k to 10k images. Adjust the number of data collected according to the size of the course. When running the course, run in the center of the course, the side of the side line, zigzag running, etc. During the trial during reinforcement learning, you do not know how to run on the course. Collect data so that the course can be represented in the event of an error.


## Mount google drive

You upload zip file that contain training data. The zip file copy from googledrive. 
Set zip file name to DATASET_ZIP.

In [None]:
from google.colab import drive 
drive.mount('/content/drive')

DATASET_ZIP = 'dataset.zip'

## Copy from google drive

Copy training data and unzip.

In [0]:

!cp '/content/drive/My Drive/$DATASET_ZIP' ./
!unzip -q DATASET_ZIP

## Resize dataset

In [0]:
import PIL
import glob

files = glob.glob(os.path.join('/content', 'DATASET_ZIP', '*.jpg')
for f in files:
  try:
    image = PIL.Image.open(f)
  except OSError:
    print('Delete' + f)
    !rm -rf f
  image = image.resize((160,120))
  image.crop((0, 40, 160, 120)).save(f, quality=95)

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.utils import save_image
from IPython.display import Image
from IPython.core.display import Image, display

%load_ext autoreload
%autoreload 2

In [0]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Load dataset



In [0]:
bs = 64
dataset = datasets.ImageFolder(root='./roll', transform=transforms.Compose([
    transforms.ToTensor(),
]))
dataloader = torch.utils.data.DataLoader(dataset, batch_size=bs, shuffle=True)
len(dataset.imgs), len(dataloader)

In [0]:
fixed_x, _ = next(iter(dataloader))
save_image(fixed_x, 'real_image.png')
Image('real_image.png')

In [0]:
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)

class UnFlatten(nn.Module):
    def forward(self, input, size=256):
        return input.view(input.size(0), size, 3, 8)


class VAE(nn.Module):
    def __init__(self, image_channels=3, h_dim=6144, z_dim=32):
        super(VAE, self).__init__()
        self.z_dim = z_dim
        self.encoder = nn.Sequential(
            nn.Conv2d(image_channels, 32, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=4, stride=2),
            nn.ReLU(),
            Flatten()
        )
        
        self.fc1 = nn.Linear(h_dim, z_dim)
        self.fc2 = nn.Linear(h_dim, z_dim)
        self.fc3 = nn.Linear(z_dim, h_dim)
        
        self.decoder = nn.Sequential(
            UnFlatten(),
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=5, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(32, image_channels, kernel_size=4, stride=2),
            nn.Sigmoid(),
        )
        
    def reparameterize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        esp = torch.randn(*mu.size()).to(device)
        z = mu + std * esp
        return z
    
    def bottleneck(self, h):
        mu, logvar = self.fc1(h), F.softplus(self.fc2(h))
        z = self.reparameterize(mu, logvar)
        return z, mu, logvar

    def encode(self, x):
        h = self.encoder(x)
        z, mu, logvar = self.bottleneck(h)
        return z, mu, logvar

    def decode(self, z):
        z = self.fc3(z)
        z = self.decoder(z)
        return z

    def forward(self, x):
        z, mu, logvar = self.encode(x)
        z = self.decode(z)
        return z, mu, logvar

    def loss_fn(self, images, reconst, mean, logvar):
        KL = -0.5 * torch.sum((1 + logvar - mean.pow(2) - logvar.exp()), dim=0)
        KL = torch.mean(KL)
        reconstruction = F.binary_cross_entropy(reconst.view(-1,38400), images.view(-1, 38400), reduction='sum') #size_average=False)
        return reconstruction + 5.0 * KL

## Prepare Training

Create VAE model and initialize optimizer.

In [0]:
from torchsummary import summary
VARIANTS_SIZE = 32
image_channels = fixed_x.size(1)
vae = VAE(image_channels=image_channels, z_dim=VARIANTS_SIZE ).to(device)
optimizer = torch.optim.Adam(vae.parameters(), lr=1e-3)
summary(vae, (3, 80, 160))

## Tensorboard



In [0]:
%load_ext tensorboard
%tensorboard --logdir ./runs

## conmple

In [None]:
from torch.utils.tensorboard import SummaryWriter
import numpy as np

writer = SummaryWriter()

vae.train()
for epoch in range(epochs):
    losses = []
    grid = None
    for idx, (images, _) in enumerate(dataloader):
        images = images.to(device)
        optimizer.zero_grad()
        recon_images, mu, logvar = vae(images)
        loss = vae.loss_fn(images, recon_images, mu, logvar)
        loss.backward()
        optimizer.step()
        losses.append(loss.cpu().detach().numpy())
        grid = torchvision.utils.make_grid(recon_images)
    writer.add_image('Image/reconst', grid, epoch)
    writer.add_scalar('Loss/train',np.average(losses), epoch)
    print("EPOCH: {} loss: {}".format(epoch+1, np.average(losses)))

torch.save(vae.state_dict(), 'vae.torch')

In [None]:
## Cleanup

Copy trained model file to GoogleDrive. 

In [0]:
!cp vae.torch '/content/drive/My Drive/vae.torch'