# Deep Convolutional Generative Adversarial Networks

[Deep Convolutional Generative Adversarial Networks](https://arxiv.org/abs/1511.06434) is a model which is capable of learning resuable feature representations from large unlabeled image datasets. It helps bridge the gap between the success of CNNs for supervised learning and unsupervised learning. 

In this tutorial, we'll use DCGAN model to train on [LWF Face Dataset](http://vis-www.cs.umass.edu/lfw/). We'll see that good image representation can be built by GAN and our model is capable of generating photo-realistic human face images.


In [None]:
from __future__ import print_function
import os
import matplotlib as mpl
import tarfile
from matplotlib import pyplot as plt

import mxnet as mx
from mxnet import gluon
from mxnet import ndarray as nd
from mxnet.gluon import nn, utils
from mxnet import autograd
import numpy as np
import cv2

## Set Training parameters

In [None]:
epochs = 100
batch_size = 32
latent_z_size = 100
ctx = [mx.cpu()]

lr = 0.0002
beta1 = 0.5

## Download and preprocess LWF Face Dataset

In [None]:
lwf_url = 'http://vis-www.cs.umass.edu/lfw/lfw-deepfunneled.tgz'
data_path = 'lwf_dataset'
if not os.path.exists(data_path):
    os.makedirs(data_path)
    data_file = utils.download(lwf_url)
    with tarfile.open(data_file) as tar:
        tar.extractall(path=data_path)

We first resize images to size 64*64. Then normalize image pixel values to be between -1 and 1.

In [None]:
target_wd = 64
target_ht = 64
img_list = []
for path, _, fnames in os.walk(data_path):
    for fname in fnames:
        if not fname.endswith('.jpg'):
            continue
        img = os.path.join(path, fname)
        img_arr = cv2.imread(img)
        # resize image to 64*64
        ht, wd = img_arr.shape[:2]
        interpolation = cv2.INTER_AREA if ht > target_ht and wd > target_wd \
            else cv2.INTER_LINEAR
        resized_img = cv2.resize(img_arr, (target_ht, target_wd), interpolation=interpolation)
        # normalize image pixel value to between -1 and 1
        normalized_img = np.array(resized_img/127.5 - 1)
        img_list.append(np.rollaxis(normalized_img, 2, 0))
train_data = mx.io.NDArrayIter(data=nd.array(img_list), batch_size=batch_size)

## Defining the networks

The core to DCGAN architecture is adopting and modifying CNN architecture:
* Replace any pooling layers with strided convolutions (discriminator) and fractional-strided convolutions (generator).

* Use batchnorm in both the generator and the discriminator.

* Remove fully connected hidden layers for deeper architectures.

* Use ReLU activation in generator for all layers except for the output, which uses Tanh.

* Use LeakyReLU activation in the discriminator for all layers.

![alt text](img/dcgan.png "DCGAN Architecture")

In [None]:
# build the generator
nc = 3
ngf = 64
netG = nn.Sequential()
with netG.name_scope():
    # input is Z, going into a convolution
    netG.add(nn.Conv2DTranspose(ngf * 8, 4, 1, 0, use_bias=False))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation('relu'))
    # state size. (ngf*8) x 4 x 4
    netG.add(nn.Conv2DTranspose(ngf * 4, 4, 2, 1, use_bias=False))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation('relu'))
    # state size. (ngf*8) x 8 x 8
    netG.add(nn.Conv2DTranspose(ngf * 2, 4, 2, 1, use_bias=False))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation('relu'))
    # state size. (ngf*8) x 16 x 16
    netG.add(nn.Conv2DTranspose(ngf, 4, 2, 1, use_bias=False))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation('relu'))
    # state size. (ngf*8) x 32 x 32
    netG.add(nn.Conv2DTranspose(nc, 4, 2, 1, use_bias=False))
    netG.add(nn.Activation('tanh'))
    # state size. (nc) x 64 x 64

# build the discriminator
ndf = 64
netD = nn.Sequential()
with netD.name_scope():
    # input is (nc) x 64 x 64
    netD.add(nn.Conv2D(ndf, 4, 2, 1, use_bias=False))
    netD.add(nn.LeakyReLU(0.2))
    # state size. (ndf) x 32 x 32
    netD.add(nn.Conv2D(ndf * 2, 4, 2, 1, use_bias=False))
    netD.add(nn.BatchNorm())
    netD.add(nn.LeakyReLU(0.2))
    # state size. (ndf) x 16 x 16
    netD.add(nn.Conv2D(ndf * 4, 4, 2, 1, use_bias=False))
    netD.add(nn.BatchNorm())
    netD.add(nn.LeakyReLU(0.2))
    # state size. (ndf) x 8 x 8
    netD.add(nn.Conv2D(ndf * 8, 4, 2, 1, use_bias=False))
    netD.add(nn.BatchNorm())
    netD.add(nn.LeakyReLU(0.2))
    # state size. (ndf) x 4 x 4
    netD.add(nn.Conv2D(2, 4, 1, 0, use_bias=False))

## Setup Loss Function and Optimizer
We use softmax cross entropy as loss function and adam as optimizer. Initialize parameters with normal distribution.

In [None]:
# loss
loss = gluon.loss.SoftmaxCrossEntropyLoss()

# initialize the generator and the discriminator
netG.initialize(mx.init.Normal(0.02), ctx=ctx)
netD.initialize(mx.init.Normal(0.02), ctx=ctx)

# trainer for the generator and the discriminator
trainerG = gluon.Trainer(netG.collect_params(), 'adam', {'learning_rate': lr, 'beta1': beta1})
trainerD = gluon.Trainer(netD.collect_params(), 'adam', {'learning_rate': lr, 'beta1': beta1})

## Training Loop

In [None]:
from datetime import datetime
import time
import logging

real_label = utils.split_and_load(mx.nd.ones((batch_size,)), ctx)
fake_label = utils.split_and_load(mx.nd.zeros((batch_size,)), ctx)
metric = mx.metric.Accuracy()
stamp =  datetime.now().strftime('%Y_%m_%d-%H_%M')
logging.basicConfig(level=logging.DEBUG)

iter = 0
for epoch in range(epochs):
    tic = time.time()
    btic = time.time()
    for batch in train_data:
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        data = utils.split_and_load(batch.data[0], ctx)
        latent_z = utils.split_and_load(mx.nd.random_normal(0, 1, shape=(batch_size, latent_z_size, 1, 1)), ctx)

        with autograd.record():
            # train with real image
            outputs = [netD(data_slice).reshape((batch.data[0].shape[0], 2)) for data_slice in data]
            errD_real = [loss(output, r_label_slice) for output, r_label_slice in zip(outputs, real_label)]
            metric.update(real_label, outputs)

            # train with fake image
            fakes = [netG(latent_z_slice) for latent_z_slice in latent_z]
            outputs = [netD(fake_slice).reshape((batch.data[0].shape[0], 2)) for fake_slice in fakes]
            errD_fake = [loss(output, f_label_slice) for output, f_label_slice in zip(outputs, fake_label)]
            errD = [errD_r + errD_f for errD_r, errD_f in zip(errD_real, errD_fake)]
            for err in errD:
                err.backward()
            metric.update(fake_label, outputs)

        trainerD.step(batch.data[0].shape[0])

        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        with autograd.record():
            outputs = [netD(fake_slice).reshape((batch.data[0].shape[0], 2)) for fake_slice in fakes]
            errG = [loss(output, r_label_slice) for output, r_label_slice in zip(outputs, real_label)]
            for err in errG:
                err.backward()

        trainerG.step(batch.data[0].shape[0])

        name, acc = metric.get()
        # logging.info('speed: {} samples/s'.format(opt.batch_size / (time.time() - btic)))
        logging.info('discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d' 
                     %(nd.mean(nd.concatenate(errD)).asscalar(), 
                       nd.mean(nd.concatenate(errG)).asscalar(), acc, iter, epoch))
        if iter % 1 == 0:
            #visual('gout', fake.asnumpy(), name=os.path.join(outf,'fake_img_iter_%d.png' %iter))
            #visual('data', data.asnumpy(), name=os.path.join(outf,'real_img_iter_%d.png' %iter))
            pass

        iter = iter + 1
        btic = time.time()

    name, acc = metric.get()
    metric.reset()
    logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc))
    logging.info('time: %f' % (time.time() - tic))


## Training loop

