# Conditional Deep Convolutional Generative Adversarial Network (CDCGAN) 

In [2]:
#Import the libraries we will need.
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
import tensorflow.contrib.slim as slim
import os
import scipy.misc
import scipy
from sklearn.utils import shuffle


We will be using the MNIST dataset. input_data is a library that downloads the dataset and uzips it automatically. It can be acquired Github here: https://gist.github.com/awjuliani/1d21151bc17362bf6738c3dc02f37906

In [18]:
DATA_SIZE = 50000
IMG_TO_SHOW = 6
IMG_CHN = 1
TRAIN_SIZE = 4
TRAIN_BATCH_SIZE = 4
G_LR = 1e-5
D_LR = 1e-5
ITER = 2500
MEMORY_FACTOR = 500

In [12]:


class MNISTDataloader:
    def __init__(self, train_data_path, visualization = False):
        train = np.load(train_data_path)
        self.train_X, self.train_y = shuffle(train['x'][:TRAIN_SIZE], train['y'][:TRAIN_SIZE], random_state = 77)
        self.test_X, self.test_y = shuffle(train['x'][TRAIN_SIZE:], train['y'][TRAIN_SIZE:], random_state = 77)
        self.train_batch_ind = 0
        self.test_batch_ind = 0
        
        if (visualization):
            print("train X size: {}".format(self.train_X.shape))
            print("train y size: {}".format(self.train_y.shape))
            print("test X size: {}".format(self.test_X.shape))
            print("test y size: {}".format(self.test_y.shape))
            for i in [0, 1, 2]:
                plt.matshow(train_X[i], cmap=plt.get_cmap('gray'))
                plt.title("" + str(i + 1) + "th Training Data ")
                plt.matshow(train_y[i], cmap=plt.get_cmap('gray'))
                plt.title("" + str(i + 1) + "th Ground Truth ")


    def next_batch(self, batch_size = 32, mode = "train"):
        out = None
        if mode == "train":
            out= [self.train_X[batch_size* self.train_batch_ind : batch_size * (self.train_batch_ind+1)] , 
                     self.train_y[batch_size* self.train_batch_ind : batch_size * (self.train_batch_ind+1)]]
            self.train_batch_ind += 1
            if (self.train_batch_ind +1)* batch_size >= TRAIN_SIZE:
                self.train_batch_ind = 0
        elif mode == "test":
            out= [self.test_X[batch_size* self.train_batch_ind : batch_size * (self.train_batch_ind+1)] , 
                     self.test_y[batch_size* self.train_batch_ind : batch_size * (self.train_batch_ind+1)]]
            self.test_batch_ind += 1
            if (self.test_batch_ind+1) * batch_size >= DATA_SIZE -TRAIN_SIZE:
                self.test_batch_ind = 0
        else:
            raise("Mode error.")
        
        return out
    
dl = MNISTDataloader("./mnist_inpainting.npz", visualization = False)


### Helper Functions

In [4]:
#This function performns a leaky relu activation, which is needed for the discriminator network.
NUM_LABELS = 47
rnd = np.random.RandomState(123)
tf.set_random_seed(123)
batch_size = 128

def lrelu(x, leak=0.2, name="lrelu"):
     with tf.variable_scope(name):
         f1 = 0.5 * (1 + leak)
         f2 = 0.5 * (1 - leak)
         return f1 * x + f2 * abs(x)
    
#The below functions are taken from carpdem20's implementation https://github.com/carpedm20/DCGAN-tensorflow
#They allow for saving sample images from the generator to follow progress
def save_images(images, size, image_path):
    return imsave(inverse_transform(images), size, image_path)

def imsave(images, size, path):
    return scipy.misc.imsave(path, merge(images, size))

def inverse_transform(images):
    return (images+1.)/2.

def merge(images, size):
    h, w = images.shape[1], images.shape[2]
    img = np.zeros((h * size[0], w * size[1]))

    for idx, image in enumerate(images):
        i = idx % size[1]
        j = idx // size[1]
        img[j*h:j*h+h, i*w:i*w+w] = image

    return img

def upsample(input, name, factor=[2,2]):
    size = [int(input.shape[1] * factor[0]), int(input.shape[2] * factor[1])]
    with tf.name_scope(name):
        out = tf.image.resize_nearest_neighbor(input, size=size, align_corners=False, name=None)
        return out

## Defining the Adversarial Networks

### CAE as image feature extractor for condition input

In [5]:
def cae_encoder(x, reuse = False):
    
    c1 = slim.convolution2d(x, 32, [3,3], stride=[1,1], padding="SAME",\
        biases_initializer=None,activation_fn=lrelu,\
        reuse=reuse,scope='conv1')
    
    p1 = slim.max_pool2d(c1, [2, 2], scope='pool1')
    
    c2 = slim.convolution2d(p1, 64, [3,3], stride=[2,2], padding="SAME",\
        biases_initializer=None,activation_fn=lrelu,\
        reuse=reuse,scope='conv2')
    
    c3 = slim.convolution2d(c2, 64, [3,3], stride=[1,1], padding="SAME",\
        biases_initializer=None,activation_fn=lrelu,\
        reuse=reuse,scope='conv3')

    e_out = slim.max_pool2d(c3, [2, 2], scope='pool2')
    
    return e_out

def cae_decoder(f, reuse = False):
    
    up1 = upsample(f, name = "up1")

    dc_1 = slim.convolution2d_transpose(\
        up1,num_outputs=64, kernel_size=[3,3],stride=[1,1],\
        padding="SAME",normalizer_fn=slim.batch_norm,\
        activation_fn=tf.nn.relu,scope='dconv_1' )

    dc_2 = slim.convolution2d_transpose(\
        dc_1, num_outputs=32, kernel_size=[3,3],stride=[2,2],\
        padding="SAME",normalizer_fn=slim.batch_norm,\
        activation_fn=tf.nn.relu,scope='dconv_2' )


    up2 = upsample(dc_2, name = "up2")

    dc_3 = slim.convolution2d_transpose(\
        up2, num_outputs=IMG_CHN, kernel_size=[3,3],stride=[1,1],\
        padding="SAME",normalizer_fn=slim.batch_norm,\
        activation_fn=tf.nn.sigmoid,scope='dconv_3' )

    return dc_3



### Generator Network

The generator takes a vector of random numbers and transforms it into a 32x32 image. Each layer in the network involves a strided  transpose convolution, batch normalization, and rectified nonlinearity. Tensorflow's slim library allows us to easily define each of these layers.

In [6]:
def generator(z, c):
    
    with tf.variable_scope("gen"):
    
        zcP = tf.concat([z, c],1)

        g_in = slim.fully_connected(zcP,4*4*128,normalizer_fn= slim.batch_norm,\
            activation_fn=tf.nn.relu,scope='g_project')

        g_in_Con = tf.reshape(g_in,[-1,4,4,128])

        gen1 = slim.convolution2d_transpose(\
            g_in_Con,num_outputs=64,kernel_size=[5,5],stride=[2,2],\
            padding="SAME",normalizer_fn=slim.batch_norm,\
            activation_fn=tf.nn.relu,scope='g_conv1')

        gen2 = slim.convolution2d_transpose(\
            gen1,num_outputs=32,kernel_size=[5,5],stride=[2,2],\
            padding="SAME",normalizer_fn=slim.batch_norm,\
            activation_fn=tf.nn.relu,scope='g_conv2')

        gen3 = slim.convolution2d_transpose(\
            gen2,num_outputs=16,kernel_size=[5,5],stride=[2,2],\
            padding="SAME",normalizer_fn=slim.batch_norm,\
            activation_fn=tf.nn.relu,scope='g_conv3')

        g_out = slim.convolution2d_transpose(\
            gen3,num_outputs=1,kernel_size=[32,32],padding="SAME",\
            biases_initializer=None,activation_fn=tf.nn.tanh,\
            scope='g_out')

    return g_out

### Discriminator Network
The discriminator network takes as input a 32x32 image and transforms it into a single valued probability of being generated from real-world data. Again we use tf.slim to define the convolutional layers, batch normalization, and weight initialization.

In [7]:
def discriminator(img, c, reuse=False):
    
    with tf.variable_scope("dis"):

        dis1 = slim.convolution2d(img,16,[4,4],stride=[2,2],padding="SAME",\
            biases_initializer=None,activation_fn=lrelu,\
            reuse=reuse,scope='d_conv1')

        dis2 = slim.convolution2d(dis1,32,[4,4],stride=[2,2],padding="SAME",\
            normalizer_fn=slim.batch_norm,activation_fn=lrelu,\
            reuse=reuse,scope='d_conv2')

        dis3 = slim.convolution2d(dis2,32,[4,4],stride=[2,2],padding="SAME",\
            normalizer_fn=slim.batch_norm,activation_fn=lrelu,\
            reuse=reuse,scope='d_conv3')

        d_fc1 = slim.fully_connected(slim.flatten(dis1),512,activation_fn=tf.nn.relu,\
            reuse=reuse,scope='d_fc1')

        d_combined =  tf.concat([d_fc1, c],1)


        d_fc2 = slim.fully_connected(slim.flatten(d_combined),128,activation_fn=tf.nn.relu,\
            reuse=reuse,scope='d_fc2')

        d_out= slim.fully_connected(slim.flatten(d_combined),1,activation_fn=tf.nn.sigmoid,\
            reuse=reuse,scope='d_out')

    return d_out

### Connecting them together

In [19]:
tf.reset_default_graph()

z_size = 100 #Size of z vector used for generator.

#These two placeholders are used for input into the generator and discriminator, respectively.

z_in = tf.placeholder(shape=[None,z_size],dtype=tf.float32) #Random vector
c_in = tf.placeholder(shape = [None,32,32,IMG_CHN],dtype = tf.float32) # conditional input
real_in = tf.placeholder(shape=[None,32,32,IMG_CHN],dtype=tf.float32) #Real images

'''
Define GAN graph
'''
fx = cae_encoder(c_in)
model_directory = '../../CAE/models' #Directory to load trained model from.

saver = tf.train.Saver()

with tf.Session() as sess:  
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    #Reload CAE the model
    ckpt = tf.train.get_checkpoint_state(model_directory)
    saver.restore(sess,ckpt.model_checkpoint_path)
    print("CAE restored.")
    
c_in_flat = tf.layers.Flatten()(fx)
Gz = generator(z_in, c_in_flat) #Generates images from random z vectors
Dx = discriminator(real_in, c_in_flat) #Produces probabilities for real images
Dg = discriminator(Gz,c_in_flat,reuse=True) #Produces probabilities for generator images
pixel_wise_diff = (Gz - real_in)
#These functions together define the optimization objective of the GAN.
d_loss = -tf.reduce_mean(tf.log(Dx) + tf.log(1.-Dg)) #This optimizes the discriminator.
g_loss = -tf.reduce_mean(tf.log(Dg)) #This optimizes the generator.
gp_loss = tf.reduce_mean((Gz - real_in)**2)

dtvars = tf.trainable_variables(scope = "dis")
gtvars = tf.trainable_variables(scope = "gen")

#The below code is responsible for applying gradient descent to update the GAN.
trainerD = tf.train.AdamOptimizer(learning_rate=D_LR,beta1=0.5)
trainerG = tf.train.AdamOptimizer(learning_rate=G_LR,beta1=0.5)
d_grads = trainerD.compute_gradients(d_loss,dtvars) #Only update the weights for the discriminator network.
g_grads = trainerG.compute_gradients(g_loss + MEMORY_FACTOR*gp_loss, gtvars) #Only update the weights for the generator network.

update_D = trainerD.apply_gradients(d_grads)
update_G = trainerG.apply_gradients(g_grads)

INFO:tensorflow:Restoring parameters from ../../CAE/models\model-4000.cptk
CAE restored.


## Training the network


In [None]:
batch_size = TRAIN_BATCH_SIZE #Size of image batch to apply at each iteration.
iterations = ITER #Total number of iterations to use.
sample_directory = './figs' #Directory to save sample images from generator in.
model_directory = './models' #Directory to save trained model to.


saver = tf.train.Saver()

GAN_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="gen") + tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="dis")
gan_init = tf.variables_initializer(GAN_list)

with tf.Session() as sess:  
    sess.run(tf.global_variables_initializer())
    sess.run(gan_init)
    
    for i in range(iterations):
        
        zs = np.random.uniform(-1.0,1.0,size=[batch_size,z_size]).astype(np.float32) #Generate a random z batch
        [xs, ys]= dl.next_batch(batch_size = batch_size) #Draw a sample batch from MNIST dataset.
        xs = np.reshape(xs,[batch_size,28,28,IMG_CHN])  #Transform it to be between -1 and 1
        xs = np.lib.pad(xs, ((0,0),(2,2),(2,2),(0,0)),'constant', constant_values=(-1, -1)) #Pad the images so the are 32x32
        ys = np.reshape(ys,[batch_size,28,28,IMG_CHN])  #Transform it to be between -1 and 1
        ys = np.lib.pad(ys, ((0,0),(2,2),(2,2),(0,0)),'constant', constant_values=(-1, -1)) #Pad the images so the are 32x32
        
        _,dLoss = sess.run([update_D,d_loss],feed_dict={z_in:zs, real_in:ys, c_in:xs }) #Update the discriminator
        _,gLoss = sess.run([update_G, g_loss + MEMORY_FACTOR*gp_loss],feed_dict={z_in:zs, real_in:ys, c_in:xs}) #Update the generator, twice for good measure.
        _,gLoss = sess.run([update_G, g_loss + MEMORY_FACTOR*gp_loss],feed_dict={z_in:zs, real_in:ys, c_in:xs})
        

        if i % 10 == 0:
            print("Gen Loss: " + str(gLoss) + " Disc Loss: " + str(dLoss))
            z2 = np.random.uniform(-1.0,1.0,size=[batch_size,z_size]).astype(np.float32) #Generate another z batch
            newZ = sess.run(Gz,feed_dict={z_in:z2, c_in:xs}) #Use new z to get sample images from generator.
            if not os.path.exists(sample_directory):
                os.makedirs(sample_directory)
            #Save sample generator images for viewing training progress.
            save_images(np.reshape(newZ[0:IMG_TO_SHOW**2],[IMG_TO_SHOW**2,32,32]),[IMG_TO_SHOW,IMG_TO_SHOW],
                        sample_directory+'/fig'+str(i)+'.png') #change to 36
        if i % 1000 == 0 and i != 0:
            if not os.path.exists(model_directory):
                os.makedirs(model_directory)
            saver.save(sess,model_directory+'/model-'+str(i)+'.cptk')
            print("Saved Model")

Gen Loss: 58.274704 Disc Loss: 1.3864276


`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


Gen Loss: 47.414364 Disc Loss: 1.2976454
Gen Loss: 41.445053 Disc Loss: 1.1934519
Gen Loss: 35.834747 Disc Loss: 1.1710944
Gen Loss: 31.663208 Disc Loss: 1.1095582
Gen Loss: 29.250317 Disc Loss: 1.0621705
Gen Loss: 26.412277 Disc Loss: 1.0303127
Gen Loss: 24.301678 Disc Loss: 0.9928082
Gen Loss: 23.171516 Disc Loss: 0.955642
Gen Loss: 20.519938 Disc Loss: 0.92453855
Gen Loss: 20.14537 Disc Loss: 0.8881037
Gen Loss: 18.732708 Disc Loss: 0.85688305
Gen Loss: 18.446308 Disc Loss: 0.8317814
Gen Loss: 17.47867 Disc Loss: 0.8032989
Gen Loss: 16.685642 Disc Loss: 0.78876114
Gen Loss: 16.932745 Disc Loss: 0.75745714
Gen Loss: 15.757004 Disc Loss: 0.7475743
Gen Loss: 14.906557 Disc Loss: 0.71972936
Gen Loss: 14.573246 Disc Loss: 0.7251408
Gen Loss: 14.180852 Disc Loss: 0.7125781
Gen Loss: 13.522078 Disc Loss: 0.68323576
Gen Loss: 12.787437 Disc Loss: 0.69956726
Gen Loss: 13.031471 Disc Loss: 0.6744321
Gen Loss: 11.871908 Disc Loss: 0.674627
Gen Loss: 12.007313 Disc Loss: 0.6737809
Gen Loss: 11.

## Using a trained network
Once we have a trained model saved, we may want to use it to generate new images, and explore the representation it has learned.

In [None]:
sample_directory = './figs' #Directory to save sample images from generator in.
model_directory = './models' #Directory to load trained model from.
batch_size_sample = 36

init = tf.initialize_all_variables()
saver = tf.train.Saver()
with tf.Session() as sess:  
    sess.run(init)
    #Reload the model.
    print 'Loading Model...'
    ckpt = tf.train.get_checkpoint_state(model_directory)
    saver.restore(sess,ckpt.model_checkpoint_path)
    
    zs = np.random.uniform(-1.0,1.0,size=[batch_size_sample,z_size]).astype(np.float32) #Generate a random z batch
    newZ = sess.run(Gz,feed_dict={z_in:z2}) #Use new z to get sample images from generator.
    if not os.path.exists(sample_directory):
        os.makedirs(sample_directory)
    save_images(np.reshape(newZ[0:batch_size_sample],[36,32,32]),[6,6],sample_directory+'/fig'+str(i)+'.png')