<a href="https://colab.research.google.com/github/BitLorax/depth-prediction/blob/master/model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
import os
import random
import cv2
import datetime
from mpl_toolkits.axes_grid1 import make_axes_locatable

%load_ext tensorboard

# SSH

In [None]:
!rm -rf /root/.ssh
!mkdir /root/.ssh

In [None]:
!tar xvzf ssh.tar.gz

In [None]:
!cp ssh-colab/* /root/.ssh && rm -rf ssh-colab && rm -rf ssh.tar.gz
!chmod 700 /root/.ssh

In [None]:
!touch /root/.ssh/known_hosts
!ssh-keyscan github.com >> /root/.ssh/known_hosts
!chmod 644 /root/.ssh/known_hosts

# Download Data

In [None]:
!git config --global user.email 'willjhliang@gmail.com'
!git config --global user.name 'BitLorax'

In [None]:
!rm -r .git

In [None]:
!git init
!git remote add -f origin git@github.com:BitLorax/depth-prediction.git

!git config core.sparseCheckout true
!echo 'imgs' >> .git/info/sparse-checkout
!echo 'deps' >> .git/info/sparse-checkout

In [None]:
!git pull origin master

# Data Generator

In [None]:
M = 1449
TRAIN_SPLIT = .9
VAL_SPLIT = .05
TEST_SPLIT = .05
m = (int)(M * TRAIN_SPLIT)
mv = m + (int)(M * VAL_SPLIT)
mt = mv + (int)(M * TEST_SPLIT)

BATCH_SIZE = 32

width, height = 128, 128
border = 8

In [None]:
class DataGenerator(tf.keras.utils.Sequence):
  def __init__(self, listIDs, batchSize, dim, nChannels):
    self.list_IDs = listIDs
    self.batch_size = batchSize
    self.dim = dim
    self.n_channels = nChannels


  def on_epoch_end(self):
    np.random.shuffle(self.list_IDs)

  
  def __len__(self):
    return int(np.floor(len(self.list_IDs) / self.batch_size))


  def __getitem__(self, idx):
    curIDs = self.list_IDs[idx*self.batch_size:(idx + 1)*self.batch_size]

    X = np.empty((self.batch_size, *self.dim, self.n_channels))
    y = np.empty((self.batch_size, *self.dim))

    for i, ID in enumerate(curIDs):
      img = np.load('imgs/img' + str(ID) + '.npy')
      dep = np.load('deps/dep' + str(ID) + '.npy')

      # take out white border
      img = img[border:img.shape[0] - border, border:img.shape[1] - border]
      dep = dep[border:dep.shape[0] - border, border:dep.shape[1] - border]

      # downsize
      scale = random.choice([.5, .55, .6])
      img = cv2.resize(img, (0, 0), fx=scale, fy=scale)
      dep = cv2.resize(dep, (0, 0), fx=scale, fy=scale)

      # get random subsection of size 128 x 128
      kx = random.randrange(0, img.shape[0] - width, 100)
      ky = random.randrange(0, img.shape[1] - height, 100)
      X[i,] = img[kx:kx + width, ky:ky + height, ...]
      y[i,] = dep[kx:kx + width, ky:ky + height]

      # flip image horizontally
      if random.random() > .5:
        X[i,] = np.flip(X[i,], 0)
        y[i,] = np.flip(y[i,], 0)

    return X, y

In [None]:
trainMult = 1
valMult = 8
testMult = 64

trainGen = DataGenerator(np.repeat(np.arange(0, m), trainMult), BATCH_SIZE, (128, 128), 3)
valGen = DataGenerator(np.repeat(np.arange(m, mv), valMult), BATCH_SIZE, (128, 128), 3)
longValGen = DataGenerator(np.repeat(np.arange(m, mv), valMult * valMult), BATCH_SIZE, (128, 128), 3)
testGen = DataGenerator(np.repeat(np.arange(mv, M), testMult), BATCH_SIZE, (128, 128), 3)

m = m * trainMult
mv = m + (int)(M * VAL_SPLIT) * valMult
M = mv + (int)(M * TEST_SPLIT) * testMult

## Load Data (Backup)
Use in case Data Generator breaks

In [None]:
temp = np.load('nyuDepth/data0.npz')['images']
width = temp.shape[1]
height = temp.shape[2]

images = np.empty((0, width, height, 3))
depths = np.empty((0, width, height))

In [None]:
LOAD_START = 0
LOAD_END = 108

for i in range(LOAD_START, LOAD_END, 1):
  data = np.load('nyuDepth/data' + str(i) + '.npz')
  curImgs = data['images']
  curDeps = data['depths']
  images = np.concatenate((images, curImgs), axis=0)
  depths = np.concatenate((depths, curDeps), axis=0)

In [None]:
m = images.shape[0]

TRAINING_SPLIT = .9
idx = (int)(TRAINING_SPLIT * m)
imgTrain = images[:idx]
depTrain = depths[:idx]
imgTest = images[idx:]
depTest = depths[idx:]

In [None]:
def shuffle(images, depths):
  random = np.arange(images.shape[0])
  np.random.shuffle(random)
  images = images[random]
  depths = depths[random]
  return images, depths

In [None]:
imgTrain, depTrain = shuffle(imgTrain, depTrain)
imgTest, depTest = shuffle(imgTest, depTest)

In [None]:
print(images.shape)
print(depths.shape)
print()
print(imgTrain.shape)
print(depTrain.shape)
print()
print(imgTest.shape)
print(depTest.shape)

In [None]:
train = tf.data.Dataset.from_tensor_slices((imgTrain, depTrain)).repeat()
train = train.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
test = tf.data.Dataset.from_tensor_slices((imgTest, depTest))

In [None]:
BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = 100

train = train.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
test = test.batch(BATCH_SIZE)

# Build Model

In [None]:
vgg16 = tf.keras.applications.VGG16(input_shape=(width, height, 3),
                                    include_top=False,
                                    weights='imagenet')

xIn = tf.keras.Input(shape=(width, height, 3))

# block 1    128 x 128 x 3 -> 64 x 64 x 64
x = tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu', name='block1_conv1')(xIn)
out1 = tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu', name='block1_conv2')
x = tf.keras.layers.BatchNormalization()(x)
x = out1(x)
x = tf.keras.layers.MaxPool2D(name='block1_pool')(x)

# block 2    64 x 64 x 64 -> 32 x 32 x 128
x = tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu', name='block2_conv1')(x)
out2 = tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu', name='block2_conv2')
x = tf.keras.layers.BatchNormalization()(x)
x = out2(x)
x = tf.keras.layers.MaxPool2D(name='block2_pool')(x)

# block 3    32 x 32 x 128 -> 16 x 16 x 256
x = tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu', name='block3_conv1')(x)
x = tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu', name='block3_conv2')(x)
out3 = tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu', name='block3_conv3')
x = tf.keras.layers.BatchNormalization()(x)
x = out3(x)
x = tf.keras.layers.MaxPool2D(name='block3_pool')(x)

# block 4    16 x 16 x 256 -> 8 x 8 x 512
x = tf.keras.layers.Conv2D(512, 3, padding='same', activation='relu', name='block4_conv1')(x)
x = tf.keras.layers.Conv2D(512, 3, padding='same', activation='relu', name='block4_conv2')(x)
out4 = tf.keras.layers.Conv2D(512, 3, padding='same', activation='relu', name='block4_conv3')
x = tf.keras.layers.BatchNormalization()(x)
x = out4(x)
x = tf.keras.layers.MaxPool2D(name='block4_pool')(x)

# block 5    8 x 8 x 512 -> 4 x 4 x 512
x = tf.keras.layers.Conv2D(512, 3, padding='same', activation='relu', name='block5_conv1')(x)
x = tf.keras.layers.Conv2D(512, 3, padding='same', activation='relu', name='block5_conv2')(x)
out5 = tf.keras.layers.Conv2D(512, 3, padding='same', activation='relu', name='block5_conv3')
x = tf.keras.layers.BatchNormalization()(x)
x = out5(x)
x = tf.keras.layers.MaxPool2D(name='block5_pool')(x)

downStack = tf.keras.Model(inputs=xIn, outputs=[out1.output, out2.output,
                                                out3.output, out4.output,
                                                out5.output, x])

# load pretrained weights
layerNames = [layer.name for layer in downStack.layers]
vggLayerNames = [layer.name for layer in vgg16.layers]
vggLayerNames.pop(0)

for i, name in enumerate(vggLayerNames):
  downStack.layers[layerNames.index(name)].set_weights(vgg16.layers[i + 1].get_weights())

In [None]:
inputs = tf.keras.layers.Input(shape=(width, height, 3))
x = inputs

skips = downStack(x)
x = skips[-1]
skips = skips[:-1]

# block 1
x = tf.keras.layers.Conv2DTranspose(512, 3, strides=2, padding='same', activation='relu',
                                    kernel_initializer=tf.keras.initializers.RandomUniform(0.0, 0.02),
                                    use_bias=False)(x)
x = tf.keras.layers.Conv2D(512, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Concatenate()([x, skips[-1]])
x = tf.keras.layers.Conv2D(1024, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(1024, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(1024, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(1024, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(1024, 3, padding='same', activation='relu')(x)

# block 2
x = tf.keras.layers.Conv2DTranspose(512, 3, strides=2, padding='same', activation='relu',
                                    kernel_initializer=tf.keras.initializers.RandomUniform(0.0, 0.02),
                                    use_bias=False)(x)
x = tf.keras.layers.Conv2D(512, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Concatenate()([x, skips[-2]])
x = tf.keras.layers.Conv2D(1024, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(1024, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(1024, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(1024, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(1024, 3, padding='same', activation='relu')(x)

# block 3
x = tf.keras.layers.Conv2DTranspose(256, 3, strides=2, padding='same', activation='relu',
                                    kernel_initializer=tf.keras.initializers.RandomUniform(0.0, 0.02),
                                    use_bias=False)(x)
x = tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Concatenate()([x, skips[-3]])
x = tf.keras.layers.Conv2D(512, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(512, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(512, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(512, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(512, 3, padding='same', activation='relu')(x)

# block 4
x = tf.keras.layers.Conv2DTranspose(128, 3, strides=2, padding='same', activation='relu',
                                    kernel_initializer=tf.keras.initializers.RandomUniform(0.0, 0.02),
                                    use_bias=False)(x)
x = tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Concatenate()([x, skips[-4]])
x = tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu')(x)

# block 5
x = tf.keras.layers.Conv2DTranspose(64, 3, strides=2, padding='same', activation='relu',
                                    kernel_initializer=tf.keras.initializers.RandomUniform(0.0, 0.02),
                                    use_bias=False)(x)
x = tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Concatenate()([x, skips[-5]])
x = tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu')(x)

last = tf.keras.layers.Conv2D(1, 3, padding='same')
x = last(x)

model = tf.keras.Model(inputs=inputs, outputs=x)

In [None]:
class RelError(tf.keras.metrics.Metric):
  def __init__(self, name='rel', **kwargs):
    super(RelError, self).__init__(name=name, **kwargs)
    self.rel_sum = self.add_weight(name='rs', initializer='zeros')
    self.count = self.add_weight(name='ct', initializer='zeros')
  
  def update_state(self, y_true, y_pred, sample_weight=None):
    y_pred = tf.cast(tf.reshape(y_pred, shape=(-1, 1)), 'float32')
    y_true = tf.cast(tf.reshape(y_true, shape=(-1, 1)), 'float32')
    rel = tf.math.divide_no_nan(tf.math.abs(tf.math.subtract(y_true, y_pred)), y_true)
    self.rel_sum.assign_add(tf.math.divide_no_nan(tf.reduce_sum(rel), tf.constant(width * height, dtype='float32')))
    self.count.assign_add(tf.constant(BATCH_SIZE, dtype='float32'))

  def result(self):
    return tf.math.divide_no_nan(self.rel_sum, self.count)

  def reset_states(self):
    self.rel_sum.assign(0.)
    self.count.assign(0.)


In [None]:
class LogError(tf.keras.metrics.Metric):
    def __init__(self, name='log', **kwargs):
      super(LogError, self).__init__(name=name, **kwargs)
      self.log_sum = self.add_weight(name='ls', initializer='zeros')
      self.count = self.add_weight(name='ct', initializer='zeros')
    
    def update_state(self, y_true, y_pred, sample_weight=None):
      y_pred = tf.cast(tf.reshape(y_pred, shape=(-1, 1)), 'float32')
      y_true = tf.cast(tf.reshape(y_true, shape=(-1, 1)), 'float32')
      y_pred = tf.math.divide_no_nan(tf.math.log(y_pred), tf.math.log(tf.constant(10, dtype=y_pred.dtype)))
      y_true = tf.math.divide_no_nan(tf.math.log(y_true), tf.math.log(tf.constant(10, dtype=y_true.dtype)))
      diff = tf.math.abs(tf.math.subtract(y_pred, y_true))
      self.log_sum.assign_add(tf.math.divide_no_nan(tf.reduce_sum(diff), tf.constant(width * height, dtype='float32')))
      self.count.assign_add(tf.constant(BATCH_SIZE, dtype='float32'))

    def result(self):
      return tf.math.divide_no_nan(self.log_sum, self.count)

    def reset_states(self):
      self.log_sum.assign(0.)
      self.count.assign(0.)

In [None]:
class ThresAcc(tf.keras.metrics.Metric):
  def __init__(self, pow, name='acc', **kwargs):
    name = name + str(pow)
    super(ThresAcc, self).__init__(name=name, **kwargs)
    self.thres = tf.math.pow(tf.constant([1.25]), tf.constant([pow]))
    self.acc_sum = self.add_weight(name='as', initializer='zeros')
    self.count = self.add_weight(name='ct', initializer='zeros')

  def update_state(self, y_true, y_pred, sample_weight=None):
    y_pred = tf.cast(tf.reshape(y_pred, shape=(-1, 1)), 'float32')
    y_true = tf.cast(tf.reshape(y_true, shape=(-1, 1)), 'float32')
    ratio = tf.math.maximum(tf.math.divide(y_true, y_pred), tf.math.divide(y_pred, y_true))
    mask = tf.math.less(ratio, self.thres)
    self.acc_sum.assign_add(tf.reduce_sum(tf.cast(mask, 'float32')))
    self.count.assign_add(tf.constant(BATCH_SIZE * width * height, dtype='float32'))
  
  def result(self):
    return tf.math.divide_no_nan(self.acc_sum, self.count)

  def reset_states(self):
    self.acc_sum.assign(0.)
    self.count.assign(0.)


In [None]:
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=.005, momentum=.9),
              loss=tf.keras.losses.Huber(),
              metrics=[tf.keras.metrics.RootMeanSquaredError(name='rms'), RelError(), LogError(), ThresAcc(1.), ThresAcc(2.), ThresAcc(3.)])

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True, expand_nested=True)

# Train Model

In [None]:
loadName = input()
if loadName != '':
  model.load_weights(loadName + '/cp.ckpt')

In [None]:
!rm -r ./logs
os.makedirs('./logs/fit')

In [None]:
def display(dList):
  fig = plt.figure()
  for i in range(len(dList)):
    plt.subplot(1, len(dList), i + 1)
    dList[i] = np.swapaxes(dList[i], 0, 1)
    plt.imshow(dList[i])
    plt.axis('off')
  divider = make_axes_locatable(plt.gca())
  cax = divider.append_axes("right", size="5%", pad=0.05)
  plt.colorbar(cax=cax)
  plt.show()
  return fig

In [None]:
sampleImg = np.load('imgs/img' + (str)(1311) + '.npy')
sampleDep = np.load('deps/dep' + (str)(1311) + '.npy')

sampleImg = sampleImg[border:-border, border:-border, ...]
sampleDep = sampleDep[border:-border, border:-border, ...]

sampleImg = cv2.resize(sampleImg, (0, 0), fx=.6, fy = .6)
sampleDep = cv2.resize(sampleDep, (0, 0), fx=.6, fy = .6)

sampleImg = sampleImg[0:width, 0:height, ...]
sampleDep = sampleDep[0:width, 0:height, ...]

In [None]:
def predict():
  return display([sampleImg.astype('uint8'), sampleDep,
                  model.predict(sampleImg[tf.newaxis, ...])[0, :, :, 0]])

In [None]:
class DisplayCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs=None):
    clear_output(wait=True)
    predict()

In [None]:
name = input()
if name != '':
  ckptPath = name + '/cp.ckpt'
else:
  ckptPath = 'checkpoints/cp.ckpt'
checkpointCallback = tf.keras.callbacks.ModelCheckpoint(ckptPath,
                                     monitor='val_loss',
                                     mode='min',
                                     verbose=1,
                                     save_freq='epoch',
                                     save_weights_only=True,
                                     save_best_only=True)

In [None]:
if name != '':
  logPath = 'logs/fit/' + name
else:
  logPath = 'logs/fit/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
tensorboardCallback = tf.keras.callbacks.TensorBoard(log_dir=logPath, histogram_freq=1)

In [None]:
reduceLR = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=.3, patience=5, min_lr=.0001)

In [None]:
EPOCHS = int(input())
STEPS_PER_EPOCH = (m / BATCH_SIZE) - 1
VAL_STEPS = ((mv - m) / BATCH_SIZE) - 1
hist = model.fit(trainGen,
                 epochs=EPOCHS,
                 steps_per_epoch = STEPS_PER_EPOCH,
                 validation_steps=VAL_STEPS,
                 validation_data=valGen,
                 callbacks=[reduceLR, DisplayCallback(), checkpointCallback, tensorboardCallback])

#Evaluate Model

In [None]:
%tensorboard --logdir logs/fit

In [None]:
valName = input()
if valName != '':
  model.load_weights(valName + '/cp.ckpt')
else:
  model.load_weights(name + '/cp.ckpt')

res = model.evaluate(longValGen)

In [None]:
predict();

In [None]:
testName = input()
if testName != '':
  model.load_weights(testName + '/cp.ckpt')
else:
  model.load_weights(name + '/cp.ckpt')

res = model.evaluate(testGen)

In [None]:
for i in range(1377, 1449):
  sampleImg = np.load('imgs/img' + (str)(i) + '.npy')
  sampleDep = np.load('deps/dep' + (str)(i) + '.npy')
  
  sampleImg = sampleImg[border:-border, border:-border, ...]
  sampleDep = sampleDep[border:-border, border:-border, ...]
  
  sampleImg = cv2.resize(sampleImg, (0, 0), fx=.5, fy = .5)
  sampleDep = cv2.resize(sampleDep, (0, 0), fx=.5, fy = .5)
  
  sampleImg = sampleImg[0:width, 0:height, ...]
  sampleDep = sampleDep[0:width, 0:height, ...]

  predict();

In [None]:
!zip -r opt-100.zip opt-100