In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt
%matplotlib inline

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters


In [2]:
import os
import time
import numpy as np
import tensorflow as tf

class VAE():
    def __init__(self):
        super().__init__()

    def load(self, npy_path):
        self.data_dict = np.load(npy_path, encoding='latin1').item()
        print("Load %s as self.data_dict" % npy_path)

    def build(self, n_dim=512, lambda_KL=1e-5, shape=(64,64,3)):
        """
        load pre-trained weights from path
        :param vgg16_npy_path: file path of vgg16 pre-trained weights
        """
        # input information
        self.H, self.W, self.C = shape
        self.n_dim = n_dim
        self.lambda_KL = lambda_KL
        
        # parameter dictionary
        self.para_dict = dict()
        self.data_dict = dict()
        self.net_shape = dict()

        # input placeholder
        self.x = tf.placeholder(tf.float32, [None, self.H, self.W, self.C])
        self.is_train = tf.placeholder(tf.bool)
        self.random_sample = tf.placeholder(tf.float32, [None, self.n_dim])
        
        # normalize inputs
        # self.x = self.x/255.0
        assert self.x.get_shape().as_list()[1:] == [self.H, self.W, self.C]
        
        with tf.variable_scope("VAE",reuse=tf.AUTO_REUSE):
            self.mean, self.logvar = self.encoder(self.x)
            self.sample = self.sampler(self.mean, self.logvar)
            self.output_image = self.decoder(self.sample)
            
            self.recon_loss = tf.reduce_mean(tf.square(tf.subtract(self.x, self.output)), [1,2,3])
            self.kl_loss = 0.5*tf.reduce_mean(tf.subtract(tf.add(tf.square(self.mean), tf.exp(self.logvar)), tf.add(1.0, self.logvar)),1)
            self.vae_loss = self.recon_loss + self.lambda_kl*self.kl_loss
            
            # Sampling from random z
            self.random_sample_images = self.decoder(self.random_sample)
    
    def encoder(self, input_image):
        # conv
        conv1 = self.conv_bn_layer(self.x, shape=(4,4,3,32), stride=2, name="conv1")
        conv2 = self.conv_bn_layer(conv1 , shape=(4,4,32,64), stride=2, name="conv2")
        conv3 = self.conv_bn_layer(conv2 , shape=(4,4,64,128), stride=2, name="conv3")
        conv4 = self.conv_bn_layer(conv3 , shape=(4,4,128,256), stride=2, name="conv4")
        flatten = self.flatten_layer(conv4, name='flatten')

        # mean and logvar
        mean = self.dense_layer(flatten, n_hidden=self.n_dim, name='mean')
        logvar = self.dense_layer(flatten, n_hidden=self.n_dim, name='logvar')
        return mean, logvar

    def sampler(self, mean, logvar):
        eps = tf.random_normal(shape=tf.shape(mean))
        return mu + tf.exp(logvar / 2) * eps
        
    def decoder(self, sample_input):
        deconv_fc1 = self.dense_layer(sample_input, n_hidden=self.net_shape['flatten'][1], name='deconv_fc1')
        deconv_input = tf.reshape(deconv_fc1, shape=[-1, 4, 4, 256])
        
        batch_size = tf.shape(sample_input)[0]
        
        deconv1 = self.trans_conv_layer(bottom=deconv_input, shape=(4,4,128,256),
                                        output_shape=[batch_size, 8, 8, 128], stride=2, name='deconv1')
        deconv2 = self.trans_conv_layer(bottom=deconv1, shape=(4,4,64,128),
                                        output_shape=[batch_size, 16, 16, 64], stride=2, name='deconv2')
        deconv3 = self.trans_conv_layer(bottom=deconv2, shape=(4,4,32,64),
                                        output_shape=[batch_size, 32, 32, 32], stride=2, name='deconv3')
        output = self.trans_conv_layer(bottom=deconv3, shape=(4,4,3,32),
                                        output_shape=[batch_size, self.H, self.W, self.C], activation='tanh', stride=2, name='deconv_output')
        return (output/2) + 0.5

    def dense_layer(self, bottom, n_hidden=None, name=None):
        bottom_shape = bottom.get_shape().as_list()
        if n_hidden is not None:
            W = self.get_weights(shape=(bottom_shape[1], n_hidden), name=name)
            b = self.get_bias(shape=n_hidden, name=name)
        elif name in self.data_dict.keys():
            W = self.get_weights(name=name)
            b = self.get_bias(name=name)
        else:
            print("Neither give a shape nor lack a pre-trained layer called %s" % name)
        self.para_dict[name] = [W, b]
        fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
        self.net_shape[name] = fc.get_shape().as_list()
        return fc

    def flatten_layer(self, bottom, name):
        shape = bottom.get_shape().as_list()
        dim = 1
        for d in shape[1:]:
            dim *= d
        flatten = tf.reshape(bottom, [-1, dim])
        self.net_shape[name] = flatten.get_shape().as_list()
        return flatten

    def avg_pool_layer(self, bottom, name):
        pool = tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
        self.net_shape[name] = pool.get_shape().as_list()
        return pool

    def max_pool_layer(self, bottom, name):
        pool = tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
        self.net_shape[name] = pool.get_shape().as_list()
        return pool
    
    def dropout(self, bottom, keep_prob):
        if self.is_train == True:
            return tf.nn.dropout(bottom, keep_prob=keep_prob)
        else:
            return bottom

    def trans_conv_layer(self, bottom, output_shape, stride, activation='relu', name=None, shape=None):
        if shape is not None:
            conv_filter, gamma, beta, bn_mean, bn_variance = self.get_conv_filter(shape=shape, name=name)
            conv_bias = self.get_bias(shape=shape[2], name=name)
        elif name in self.data_dict.keys():
            conv_filter, gamma, beta, bn_mean, bn_variance = self.get_conv_filter(name=name)
            conv_bias = self.get_bias(name=name)
        else:
            print("Neither give a shape nor lack a pre-trained layer called %s" % name)

        self.para_dict[name] = [conv_filter, conv_bias]
        self.para_dict[name+"_gamma"] = gamma
        self.para_dict[name+"_beta"] = beta
        self.para_dict[name+"_bn_mean"] = bn_mean
        self.para_dict[name+"_bn_variance"] = bn_variance

        conv = tf.nn.conv2d_transpose(bottom, conv_filter, output_shape, strides=[1, stride, stride, 1], padding="SAME")
        conv = tf.nn.bias_add(conv, conv_bias)
        
        from tensorflow.python.training.moving_averages import assign_moving_average
        def mean_var_with_update():
            mean, variance = tf.nn.moments(conv, [0,1,2], name='moments')
            with tf.control_dependencies([assign_moving_average(bn_mean, mean, 0.99),
                                            assign_moving_average(bn_variance, variance, 0.99)]):
                return tf.identity(mean), tf.identity(variance)

        mean, variance = tf.cond(self.is_train, mean_var_with_update, lambda:(bn_mean, bn_variance))
        conv = tf.nn.batch_normalization(conv, mean, variance, beta, gamma, 1e-05)
        self.net_shape[name] = conv.get_shape().as_list()

        if activation=='tanh':
            print('tanh')
            tanh = tf.nn.tanh(conv)
            return tanh
        else:
            relu = tf.nn.leaky_relu(conv)
            return relu

    def conv_bn_layer(self, bottom, stride=1, activation='lrelu', name=None, shape=None):
        if shape is not None:
            conv_filter, gamma, beta, bn_mean, bn_variance = self.get_conv_filter(shape=shape, name=name)
            conv_bias = self.get_bias(shape=shape[3], name=name)
        elif name in self.data_dict.keys():
            conv_filter, gamma, beta, bn_mean, bn_variance = self.get_conv_filter(name=name)
            conv_bias = self.get_bias(name=name)
        else:
            print("Neither give a shape nor lack a pre-trained layer called %s" % name)

        self.para_dict[name] = [conv_filter, conv_bias]
        self.para_dict[name+"_gamma"] = gamma
        self.para_dict[name+"_beta"] = beta
        self.para_dict[name+"_bn_mean"] = bn_mean
        self.para_dict[name+"_bn_variance"] = bn_variance

        conv = tf.nn.conv2d(bottom, conv_filter, [1, stride, stride, 1], padding='SAME')
        conv = tf.nn.bias_add(conv, conv_bias)

        from tensorflow.python.training.moving_averages import assign_moving_average
        def mean_var_with_update():
            mean, variance = tf.nn.moments(conv, [0,1,2], name='moments')
            with tf.control_dependencies([assign_moving_average(bn_mean, mean, 0.99),
                                            assign_moving_average(bn_variance, variance, 0.99)]):
                return tf.identity(mean), tf.identity(variance)

        mean, variance = tf.cond(self.is_train, mean_var_with_update, lambda:(bn_mean, bn_variance))

        conv = tf.nn.batch_normalization(conv, mean, variance, beta, gamma, 1e-05)
        self.net_shape[name] = conv.get_shape().as_list()

        if activation=='tanh':
            tanh = tf.nn.tanh(conv)
            return tanh
        else:
            relu = tf.nn.leaky_relu(conv)
            return relu

    def get_conv_filter(self, shape=None, name=None, with_bn=True):
        if shape is not None:
            conv_filter = tf.get_variable(shape=shape, initializer=tf.truncated_normal_initializer(mean=0, stddev=0.1), name=name+"_W", dtype=tf.float32)
        elif name in self.data_dict.keys():
            conv_filter = tf.get_variable(initializer=self.data_dict[name][0], name=name+"_W")
        else:
            print("Neither give a shape nor lack a pre-trained layer called %s" % name)
            return None

        if with_bn:
            if 'deconv' in name:
                H,W,O,C = conv_filter.get_shape().as_list()
            else:
                H,W,C,O = conv_filter.get_shape().as_list()

            if name+"_gamma" in self.data_dict.keys(): 
                gamma = tf.get_variable(initializer=self.data_dict[name+"_gamma"], name=name+"_gamma")
            else:
                gamma = tf.get_variable(shape=(O,), initializer=tf.ones_initializer(), name=name+"_gamma")

            if name+"_beta" in self.data_dict.keys(): 
                beta = tf.get_variable(initializer=self.data_dict[name+"_beta"], name=name+"_beta")
            else:
                beta = tf.get_variable(shape=(O,), initializer=tf.zeros_initializer(), name=name+'_beta')

            if name+"_bn_mean" in self.data_dict.keys(): 
                bn_mean = tf.get_variable(initializer=self.data_dict[name+"_bn_mean"], name=name+"_bn_mean")
            else:
                bn_mean = tf.get_variable(shape=(O,), initializer=tf.zeros_initializer(), name=name+'_bn_mean')

            if name+"_bn_variance" in self.data_dict.keys(): 
                bn_variance = tf.get_variable(initializer=self.data_dict[name+"_bn_variance"], name=name+"_bn_variance")
            else:
                bn_variance = tf.get_variable(shape=(O,), initializer=tf.ones_initializer(), name=name+'_bn_variance')
            return conv_filter, gamma, beta, bn_mean, bn_variance
        else:
            return conv_filter
    
    def get_weights(self, shape=None, name=None):
        if shape is not None:
            return tf.get_variable(shape=shape, initializer=tf.truncated_normal_initializer(mean=0, stddev=0.1), name=name+"_W", dtype=tf.float32)
        elif name in self.data_dict.keys(): 
            return tf.get_variable(initializer=self.data_dict[name][0], name=name+"_W")
        else:
            print("(get_weight) neither give a shape nor lack a pre-trained layer called %s" % name)
            return None
            
    def get_bias(self, shape=None, name=None):
        if shape is not None:
            return tf.get_variable(shape=shape, initializer=tf.truncated_normal_initializer(mean=0, stddev=0.1), name=name+"_b", dtype=tf.float32)
        elif name in self.data_dict.keys(): 
            return tf.get_variable(initializer=self.data_dict[name][1], name=name+"_b")
        else:
            print("(get_bias) neither give a shape nor lack a pre-trained layer called %s" % name)
            return None


In [3]:
# %load train.py
import os
import time
import argparse
import numpy as np
import tensorflow as tf

from progress.bar import Bar
from ipywidgets import IntProgress
from IPython.display import display
import skimage.transform
import imageio

from model import VAE
from utils import read_dataset

TRAIN_CSV = "hw4_data/train.csv"
TRAIN_DIR = "hw4_data/train/"
TEST_CSV = "hw4_data/test.csv"
TEST_DIR = "hw4_data/test/"

FLAG_lr = 1e-4
FLAG_save_dir = 'save/'
FLAG_lambda_KL = 1e-5
FLAG_batch_size = 64
FLAG_n_dim = 512

print("Reading dataset...")
# load data
Xtrain, df_train = read_dataset(TRAIN_CSV, TRAIN_DIR)
Xtest , df_test  = read_dataset(TEST_CSV , TEST_DIR)

vae = VAE()
vae.build(lambda_KL=FLAG_lambda_KL,n_dim=FLAG_n_dim, shape=Xtrain.shape[1:])

saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
checkpoint_path = os.path.join(FLAG_save_dir, 'model.ckpt')

def initialize_uninitialized(sess):
    global_vars = tf.global_variables()
    is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars])
    not_initialized_vars = [v for (v,f) in zip(global_vars, is_not_initialized) if not f]
    if len(not_initialized_vars): 
            sess.run(tf.variables_initializer(not_initialized_vars))

def res_plot(samples, n_row, n_col):     
    fig = plt.figure(figsize=(n_col*2, n_row*2))
    gs = gridspec.GridSpec(n_row, n_col)
    gs.update(wspace=0.05, hspace=0.05)
    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(64, 64, 3))
    return fig

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    # hyper parameters
    batch_size = 64
    epoch = 1
    early_stop_patience = 50
    min_delta = 0.0001
    opt_type = 'adam'

    # recorder
    epoch_counter = 0

    # optimizer
    global_step = tf.Variable(0, trainable=False)

    # Passing global_step to minimize() will increment it at each step.
    if opt_type is 'sgd':
        start_learning_rate = FLAG_lr
        half_cycle = 2000
        learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, half_cycle, 0.5, staircase=True)
        opt = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9, use_nesterov=True)
    else:
        start_learning_rate = FLAG_lr
        half_cycle = 2000
        learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, half_cycle, 0.5, staircase=True)
        opt = tf.train.AdamOptimizer(learning_rate=learning_rate)

    obj = vae.vae_loss
    train_op = opt.minimize(obj, global_step=global_step)

    # progress bar
    ptrain = IntProgress()
    pval = IntProgress()
    display(ptrain)
    display(pval)
    ptrain.max = int(Xtrain.shape[0]/batch_size)
    pval.max = int(Xtest.shape[0]/batch_size)

    # re-initialize
    initialize_uninitialized(sess)

    # reset due to adding a new task
    patience_counter = 0
    current_best_val_loss = np.float('Inf')

    # optimize when the aggregated obj
    while(patience_counter < early_stop_patience and epoch_counter < epoch):

        # start training
        stime = time.time()
        bar_train = Bar('Training', max=int(Xtrain.shape[0]/batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
        bar_val =  Bar('Validation', max=int(Xtest.shape[0]/batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')

        train_loss = 0.0
        train_reconstruction_loss = 0.0
        train_kl_loss = 0.0
        for i in range(int(Xtrain.shape[0]/batch_size)):
            st = i*batch_size
            ed = (i+1)*batch_size
            loss, reconstruction_loss, kl_loss ,_ = sess.run([vae.vae_loss, vae.recon_loss, vae.kl_loss, train_op],
                                feed_dict={vae.x: Xtrain[st:ed,:],
                                            vae.is_train: True})
            train_loss += loss
            train_reconstruction_loss += reconstruction_loss
            train_kl_loss += kl_loss
            ptrain.value +=1
            ptrain.description = "Training %s/%s" % (ptrain.value, ptrain.max)

        train_loss = train_loss/ptrain.value
        train_reconstruction_loss = train_reconstruction_loss/ptrain.value
        train_kl_loss = train_kl_loss/ptrain.value

        # validation
        val_loss = 0
        val_reconstruction_loss = 0.0
        val_kl_loss = 0.0
        for i in range(int(Xtest.shape[0]/batch_size)):
            st = i*batch_size
            ed = (i+1)*batch_size
            loss, reconstruction_loss, kl_loss = sess.run([vae.vae_loss, vae.recon_loss, vae.kl_loss],
                                feed_dict={vae.x: Xtest[st:ed,:],
                                            vae.is_train: False})
            val_loss += loss
            val_reconstruction_loss += reconstruction_loss
            val_kl_loss += kl_loss
            pval.value += 1
            pval.description = "Testing %s/%s" % (pval.value, pval.value)
        val_loss = val_loss/pval.value
        val_reconstruction_loss = val_reconstruction_loss/pval.value
        val_kl_loss = val_kl_loss/pval.value

        # plot
        if epoch_counter%10 == 0:
            Xplot = sess.run(vae.output,
                    feed_dict={vae.x: Xtest[:10,:],
                                vae.is_train: False})
            fig = res_plot(np.concatenate((Xtest[:10,:], Xplot), axis=0), 2, 10)
            plt.savefig(os.path.join(FLAG_save_dir, 'recons', '{}.png'.format(str(epoch).zfill(3))), 
                        bbox_inches='tight')
            plt.close(fig)

            #### produce 32 random images
            samples = self.sess.run(vae.random_sample_images, feed_dict={vae.random_sample: np.random.randn(32, vae.n_dim),
                                                                   vae.is_train: False})
            fig = self.plot(samples, 4, 8)
            plt.savefig(os.path.join(FLAG_save_dir,'samples', '{}.png'.format(str(epoch).zfill(3))), 
                        bbox_inches='tight')
            plt.close(fig)
        # early stopping check
        if (current_best_val_loss - val_loss) > min_delta:
            current_best_val_loss = val_loss
            patience_counter = 0
            saver.save(sess, checkpoint_path, global_step=epoch_counter)
            print("save in %s" % checkpoint_path)
        else:
            patience_counter += 1

        # shuffle Xtrain and Ytrain in the next epoch
        idx = np.random.permutation(Xtrain.shape[0])
        Xtrain= Xtrain[idx,:,:,:]

        # epoch end
        epoch_counter += 1

        ptrain.value = 0
        pval.value = 0
        bar_train.finish()
        bar_val.finish()

        print("Epoch %s (%s), %s sec >> train loss: %.4f, train recon loss: %.4f, train kl loss: %.4f, val loss: %.4f, val recon loss: %.4f, val kl loss: %.4f" % (epoch_counter, patience_counter, round(time.time()-stime,2), train_loss, train_reconstruction_loss, train_kl_loss, val_loss, val_reconstruction_loss ,val_kl_loss))

    # para_dict = sess.run(vgg16.para_dict)
    # np.save(os.path.join(FLAG_save_dir, "para_dict.npy"), para_dict)
    # print("save in %s" % os.path.join(FLAG_save_dir, "para_dict.npy"))

#     FLAG_optimizer = opt_type
#     FLAG_lr = start_learning_rate
#     FLAG_batch_size = batch_size
#     FLAG_epoch_end = epoch_counter
#     FLAG_val_loss = current_best_val_loss

#     header = ''
#     row = ''
#     for key in sorted(vars(FLAG)):
#         if header is '':
#             header = key
#             row = str(getattr(FLAG, key))
#         else:
#             header += ","+key
#             row += ","+str(getattr(FLAG,key))
#     row += "\n"
#     if os.path.exists("/home/cmchang/DLCV2018SPRING/hw4/model.csv"):
#         with open("/home/cmchang/DLCV2018SPRING/hw4/model.csv", "a") as myfile:
#             myfile.write(row)
#     else:
#         with open("/home/cmchang/DLCV2018SPRING/hw4/model.csv", "w") as myfile:
#             myfile.write(header)
#             myfile.write(row)


Reading dataset...
relu
relu
relu
relu
relu
relu
relu
tanh


save in save/model.ckpt
Epoch 1 (0), 1046.61 sec >> train loss: 18059.1152, train recon loss: 18053.7104, train kl loss: 540486.6119, val loss: 17611.5459, val recon loss: 17611.5356, val kl loss: 1023.7820


NameError: name 'FLAG' is not defined