<a href="https://colab.research.google.com/github/wynnliam/texture_generator/blob/honeycomb/model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Liam Wynn, 2/11/2019, Variational Autoendcoder Texture

In [0]:
import tensorflow as tf
import numpy as np
import PIL.Image
import os

from google.colab import files
from google.colab import drive

In [0]:
# This is the standard way to load in data. If for some reason
# drive.mount doesn't work, use the other method.
texture_type = 'honeycombed'
model_path = '/content/drive/My Drive/VAE_Textures/' + texture_type + '/'
data_path = model_path + '/data/'

drive.mount('/content/drive')

def normalize_datum(datum_as_np_array):
  return datum_as_np_array / 255.0

def load_example(image_path):
  raw_datum = PIL.Image.open(image_path)
  datum_as_np_array = np.array(raw_datum, dtype = np.float32)
  return normalize_datum(datum_as_np_array)
  
# TODO: Add test/validation data
def load_dataset():
  training_data_path = data_path + '/training_data/'
  image_names = os.listdir(training_data_path)
  result = []
  
  for image_name in image_names:
    result.append(load_example(training_data_path + image_name))
    
  return np.array(result)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# TODO: Add method to load data from file here

In [0]:
dataset = load_dataset()

for image in dataset:
  print(np.shape(image))

(305, 379, 3)
(640, 640, 3)
(120, 160, 3)
(427, 640, 3)
(322, 215, 3)
(283, 314, 3)
(160, 221, 3)
(318, 373, 3)


In [0]:
def get_training_sample_from_example(example):
  example_shape = np.shape(example)
  
  num_rows = example_shape[0] - EXAMPLE_SIZE
  num_cols = example_shape[1] - EXAMPLE_SIZE
  
  start_row = np.random.randint(0, num_rows)
  start_col = np.random.randint(0, num_cols)
  
  result = example[np.ix_(range(start_row, start_row + EXAMPLE_SIZE),
                          range(start_col, start_col + EXAMPLE_SIZE),
                          [0])
                  ]
  
  return result

def load_batch_from_dataset(dataset, batch_size=64):
  data_shape = np.shape(dataset)
  num_examples = data_shape[0]
  result = []
  
  for i in range(batch_size):
    example_index = np.random.randint(0, num_examples)
    result.append(get_training_sample_from_example(dataset[example_index]))
  
  return np.array(result)


In [0]:
BATCH_SIZE = 128
# In pixels
EXAMPLE_SIZE = 64

NUM_LATENT_UNITS = 8

In [0]:
def encoder(X_in, keep_prob):
  with tf.variable_scope("encoder", reuse=None):
    l = tf.layers.conv2d(X_in, filters=64, kernel_size=4, strides=2, padding='same', activation=tf.nn.leaky_relu)
    l = tf.nn.dropout(l, keep_prob)
    l = tf.layers.conv2d(l, filters=64, kernel_size=4, strides=2, padding='same', activation=tf.nn.leaky_relu)
    l = tf.nn.dropout(l, keep_prob)
    l = tf.layers.conv2d(l, filters=64, kernel_size=4, strides=1, padding='same', activation=tf.nn.leaky_relu)
    l = tf.nn.dropout(l, keep_prob)
    l = tf.layers.flatten(l)
    
    # Not entirely sure how this computes mean/sd. The mean part I kind of get,
    # but the sd seems to be half of whatever the mn is?
    mn = tf.layers.dense(l, units=NUM_LATENT_UNITS)
    # I think this has to do with how z is computer. Typically,
    # z = mn + sd^(1/2) * epsilon
    #sd = 0.5 * tf.layers.dense(l, units=NUM_LATENT_UNITS)
    sd = tf.layers.dense(l, units=NUM_LATENT_UNITS)
    # epsilon is randomly sampled from a normal distribution. By doing this,
    # we can perform back-prop on mn and sd, while still randomly sampling.
    epsilon = tf.random_normal(tf.stack([tf.shape(l)[0], NUM_LATENT_UNITS]))
    # Reparameterized: z = mean + epsilon * e^sd.
    z  = mn + tf.multiply(epsilon, tf.exp(sd))
        
    return z, mn, sd

In [0]:
def decoder(sampled_z, keep_prob):
  with tf.variable_scope("decoder", reuse=None):
    l = tf.layers.dense(sampled_z, units=4 * 4 * 1024, activation=tf.nn.leaky_relu)
    l = tf.reshape(l, [-1, 4, 4, 1024])
    
    l = tf.layers.conv2d_transpose(l, filters=512, kernel_size=5, strides=2, padding='same', activation=tf.nn.relu)
    l = tf.nn.dropout(l, keep_prob)
    
    l = tf.layers.conv2d_transpose(l, filters=256, kernel_size=5, strides=2, padding='same', activation=tf.nn.leaky_relu)
    l = tf.nn.dropout(l, keep_prob)
    
    l = tf.layers.conv2d_transpose(l, filters=128, kernel_size=5, strides=2, padding='same', activation=tf.nn.leaky_relu)
    l = tf.nn.dropout(l, keep_prob)
    
    l = tf.layers.conv2d_transpose(l, filters=1, kernel_size=5, strides=2, padding='same', activation=tf.nn.sigmoid)
    
    return l

In [0]:
tf.reset_default_graph()

X_in = tf.placeholder(dtype=tf.float32, shape=[None, EXAMPLE_SIZE, EXAMPLE_SIZE, 1], name='X')
Y = tf.placeholder(dtype=tf.float32, shape=[None, EXAMPLE_SIZE, EXAMPLE_SIZE, 1], name='Y')
Y_flat = tf.reshape(Y, shape=[-1, EXAMPLE_SIZE * EXAMPLE_SIZE])
keep_prob = tf.placeholder(dtype=tf.float32, shape=(), name='keep_prob')

sampled, mn, sd = encoder(X_in, keep_prob)
dec  = decoder(sampled, keep_prob)

print(sampled)
print(dec)

unreshaped = tf.reshape(dec, [-1, EXAMPLE_SIZE * EXAMPLE_SIZE])
# Reconstruction loss. Y_flat is a flattened version of our Y (which is the
# same as X_in) that we use to compare against a flattened version of the
# decoder's output. We can use squared difference because P(X_in | dec)
# is an isotropic gaussian (no idea what that means, but this is a property of
# isotropic gaussians apparently).
img_loss = tf.reduce_sum(tf.squared_difference(unreshaped, Y_flat), 1)
# I think this is like a closed-form version of KL divergence
# between two multivariate gaussian distributions. Basically we want to
# encourage our latent distribution to be as close to a normal distribution
# as possible.
latent_loss = -0.5 * tf.reduce_sum(1.0 + 2.0 * sd - tf.square(mn) - tf.exp(2.0 * sd), 1)
# And, of course, the overall loss is the sum of the image loss and latent loss.
loss = tf.reduce_mean(img_loss + latent_loss)
optimizer = tf.train.AdamOptimizer(0.0005).minimize(loss)


Tensor("encoder/add:0", shape=(?, 8), dtype=float32)
Tensor("decoder/conv2d_transpose_3/Sigmoid:0", shape=(?, 64, 64, 1), dtype=float32)


In [0]:
# Set up GPU running capabilities
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

config = tf.ConfigProto()
config.gpu_options.allow_growth = True

Found GPU at: /device:GPU:0


In [0]:
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())

for i in range(3001):
  with tf.device('/gpu:0'): 
    batch = load_batch_from_dataset(dataset)
    sess.run(optimizer, feed_dict = {X_in : batch, Y: batch, keep_prob: 0.8})

  if i % 10 == 0:
    print('epoch: ', i)
    ls, d, i_ls, d_ls, mu, sigm = sess.run([loss, dec, img_loss, latent_loss, mn, sd], feed_dict={X_in: batch, Y: batch, keep_prob: 1.0})
    print('loss: ', ls)
    print('')
  
print('Done!')

epoch:  0
loss:  284.83752

epoch:  10
loss:  281.35553

epoch:  20
loss:  264.523

epoch:  30
loss:  269.99805

epoch:  40
loss:  234.58981

epoch:  50
loss:  206.36395

epoch:  60
loss:  153.51457

epoch:  70
loss:  150.36302

epoch:  80
loss:  172.37888

epoch:  90
loss:  144.57495

epoch:  100
loss:  158.396

epoch:  110
loss:  131.33516

epoch:  120
loss:  135.1007

epoch:  130
loss:  141.48257

epoch:  140
loss:  119.19731

epoch:  150
loss:  158.30426

epoch:  160
loss:  144.48721

epoch:  170
loss:  177.26175

epoch:  180
loss:  137.02763

epoch:  190
loss:  136.978

epoch:  200
loss:  144.00241

epoch:  210
loss:  117.22205

epoch:  220
loss:  173.21045

epoch:  230
loss:  124.19166

epoch:  240
loss:  139.14236

epoch:  250
loss:  116.81865

epoch:  260
loss:  153.12805

epoch:  270
loss:  118.401146

epoch:  280
loss:  128.6437

epoch:  290
loss:  144.58607

epoch:  300
loss:  178.53299

epoch:  310
loss:  131.54938

epoch:  320
loss:  140.82745

epoch:  330
loss:  119.24196

In [0]:
# Generate and save some examples
output_path = model_path + 'outputs/generated/'

print(output_path)

num_generated_images = 10
randoms = []
for i in range(num_generated_images):
  randoms.append(np.random.normal(0, 1, NUM_LATENT_UNITS))
  
randoms = np.array(randoms)
with tf.device('/gpu:0'): 
  images = sess.run(dec, feed_dict = {sampled: randoms, keep_prob: 1.0})

for j in range(num_generated_images):
  result = images[j]
  generated_image = np.repeat(result, 3, axis=2)
  generated_image = np.array(generated_image * 255.0, dtype=np.uint8)
  
  save_image = PIL.Image.fromarray(generated_image)
  save_image.save(output_path + str(j) + '.bmp')

/content/drive/My Drive/VAE_Textures/honeycombed/outputs/generated/
