## 1. Check all functions in helpers.py

In [1]:
import cv2
import numpy as np
import itertools
import operator
import os, csv
import tensorflow as tf

import time, datetime

### 1.1. [class_names, label_values] = get_label_info(csv_path)

In [2]:
def get_label_info(csv_path):
    """
    Retrieve the class names and label values for the selected dataset.
    Must be in CSV format!

    # Arguments
        csv_path: The file path of the class dictionairy
        
    # Returns
        Two lists: one for the class names and the other for the label values
    """
    filename, file_extension = os.path.splitext(csv_path)
    if not file_extension == ".csv":
        return ValueError("File is not a CSV!")

    class_names = []
    label_values = []
    with open(csv_path, 'r') as csvfile:
        file_reader = csv.reader(csvfile, delimiter=',')
        header = next(file_reader)
        for row in file_reader:
            class_names.append(row[0])
            label_values.append([int(row[1]), int(row[2]), int(row[3])])
        # print(class_dict)
    return class_names, label_values

Test the function

In [3]:
csv_path = "E:\\Python Workspace\\Semantic-Segmentation-Suite\\CamVid\\class_dict.csv"
class_names, label_values = get_label_info(csv_path)

In [4]:
for name, value in zip(class_names, label_values):
    print(name, value)

Animal [64, 128, 64]
Archway [192, 0, 128]
Bicyclist [0, 128, 192]
Bridge [0, 128, 64]
Building [128, 0, 0]
Car [64, 0, 128]
CartLuggagePram [64, 0, 192]
Child [192, 128, 64]
Column_Pole [192, 192, 128]
Fence [64, 64, 128]
LaneMkgsDriv [128, 0, 192]
LaneMkgsNonDriv [192, 0, 64]
Misc_Text [128, 128, 64]
MotorcycleScooter [192, 0, 192]
OtherMoving [128, 64, 64]
ParkingBlock [64, 192, 128]
Pedestrian [64, 64, 0]
Road [128, 64, 128]
RoadShoulder [128, 128, 192]
Sidewalk [0, 0, 192]
SignSymbol [192, 128, 128]
Sky [128, 128, 128]
SUVPickupTruck [64, 128, 192]
TrafficCone [0, 0, 64]
TrafficLight [0, 64, 64]
Train [192, 64, 128]
Tree [128, 128, 0]
Truck_Bus [192, 128, 192]
Tunnel [64, 0, 64]
VegetationMisc [192, 192, 0]
Void [0, 0, 0]
Wall [64, 192, 0]


### 1.2. semantic_map = one_hot_it(label, label_values)

In [5]:
def one_hot_it(label, label_values):
    """
    Convert a segmentation image label array to one-hot format
    by replacing each pixel value with a vector of length num_classes

    # Arguments
        label: The 2D array segmentation image label
        label_values
        
    # Returns
        A 2D array with the same width and hieght as the input, but
        with a depth size of num_classes
    """
    # st = time.time()
    # w = label.shape[0]
    # h = label.shape[1]
    # num_classes = len(class_dict)
    # x = np.zeros([w,h,num_classes])
    # unique_labels = sortedlist((class_dict.values()))
    # for i in range(0, w):
    #     for j in range(0, h):
    #         index = unique_labels.index(list(label[i][j][:]))
    #         x[i,j,index]=1
    # print("Time 1 = ", time.time() - st)

    # st = time.time()
    # https://stackoverflow.com/questions/46903885/map-rgb-semantic-maps-to-one-hot-encodings-and-vice-versa-in-tensorflow
    # https://stackoverflow.com/questions/14859458/how-to-check-if-all-values-in-the-columns-of-a-numpy-matrix-are-the-same
    semantic_map = []
    for colour in label_values:
        # colour_map = np.full((label.shape[0], label.shape[1], label.shape[2]), colour, dtype=int)
        equality = np.equal(label, colour)
        class_map = np.all(equality, axis = -1)
        semantic_map.append(class_map)
    semantic_map = np.stack(semantic_map, axis=-1)
    # print("Time 2 = ", time.time() - st)

    return semantic_map

Test the function

In [132]:
#label = np.array([[0,0,1],[2,0,1],[0,3,1]])
#label_values = np.array([0,1,2,3])
#semantic_map = one_hot_it(label, label_values)
label = np.array([[[0,0,0],[0,0,0],[0,0,1]],[[0,1,0],[0,0,0],[0,0,1]],[[0,0,0],[0,1,1],[0,0,1]]])
label_values = np.array([[0,0,0],[0,0,1],[0,1,0],[0,1,1]])
semantic_map = one_hot_it(label, label_values)

In [144]:
print(label.shape)
print(semantic_map.shape)

(3, 3, 3)
(3, 3, 3, 4)


The original function does not work properly. I have to correct it as follows:

In [151]:
def one_hot_it(label, label_values):
    """
    Convert a segmentation image label array to one-hot format
    by replacing each pixel value with a vector of length num_classes

    # Arguments
        label: The 2D array segmentation image label
        label_values
        
    # Returns
        A 3D array with the same width and hieght as the input, but
        with a depth size of num_classes
    """
    semantic_map = np.zeros([label.shape[0],label.shape[1],label_values.shape[0]])
    for i_colour in range(label_values.shape[0]):
        colour = label_values[i_colour]
        equality = np.zeros_like(label,dtype=bool)
        for i_channel in range(colour.shape[0]):
            equality[:,:,i_channel] = np.equal(label[:,:,i_channel], colour[i_channel])
        semantic_map[:,:,i_colour] = np.all(equality, axis = -1)
        
    semantic_map = semantic_map.astype(int)

    return semantic_map

In [152]:
colour.shape[0]

3

In [153]:
label_values.shape

(4, 3)

Test the function

In [154]:
#label = np.array([[0,0,3],[6,0,3],[0,9,3]])
#label_values = np.array([0,3,6,9])
label = np.array([[[0,0,0],[0,0,0],[0,0,1]],[[0,1,0],[0,0,0],[0,0,1]],[[0,0,0],[0,1,1],[0,0,1]]])
label_values = np.array([[0,0,0],[0,0,1],[0,1,0],[0,1,1]])
semantic_map = one_hot_it(label, label_values)

In [157]:
print('label: \n',label)
print('semantic_map: \n',semantic_map)

label: 
 [[[0 0 0]
  [0 0 0]
  [0 0 1]]

 [[0 1 0]
  [0 0 0]
  [0 0 1]]

 [[0 0 0]
  [0 1 1]
  [0 0 1]]]
semantic_map: 
 [[[1 0 0 0]
  [1 0 0 0]
  [0 1 0 0]]

 [[0 0 1 0]
  [1 0 0 0]
  [0 1 0 0]]

 [[1 0 0 0]
  [0 0 0 1]
  [0 1 0 0]]]


### 1.3. x = reverse_one_hot(image)

In [11]:
def reverse_one_hot(semantic_map):
    """
    Transform a 3D array in one-hot format (depth is num_classes),
    to a 2D array with only 1 channel, where each pixel value is
    the classified class key.

    # Arguments
        image: The one-hot format image 
        
    # Returns
        A 2D array with the same width and hieght as the input, but
        with a depth size of 1, where each pixel value is the classified 
        class key.
    """
    x = np.argmax(semantic_map, axis = -1)
    return x

Test the function

In [12]:
key_map = reverse_one_hot(semantic_map)

In [13]:
print(key_map)

[[0 0 1]
 [2 0 1]
 [0 3 1]]


### 1.4. x = colour_code_segmentation(image, label_values)

In [14]:
def colour_code_segmentation(key_map, label_values):
    """
    Given a 1-channel array of class keys, colour code the segmentation results.

    # Arguments
        image: single channel array where each value represents the class key.
        label_values
        
    # Returns
        Colour coded image for segmentation visualization
    """
    
    colour_codes = np.array(label_values)
    x = colour_codes[key_map.astype(int)]

    return x

Test the function

In [15]:
x = colour_code_segmentation(key_map, label_values)

In [16]:
print(x)

[[0 0 3]
 [6 0 3]
 [0 9 3]]


## 2. Check all functions in utils.py

In [17]:
from __future__ import print_function
from __future__ import division
import os,time,cv2, sys, math
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np
import time, datetime
import os, random
from scipy.misc import imread
import ast
from sklearn.metrics import precision_score, \
    recall_score, confusion_matrix, classification_report, \
    accuracy_score, f1_score

  from ._conv import register_converters as _register_converters


### 2.1. file_name = filepath_to_name(full_name)

In [18]:
# Takes an absolute file path and returns the name of the file without th extension
def filepath_to_name(full_name):
    file_name = os.path.basename(full_name)
    file_name = os.path.splitext(file_name)[0]
    return file_name

Test the function

In [19]:
full_name = "E:\\Python Workspace\\Semantic-Segmentation-Suite\\CamVid\\class_dict.csv"
print(full_name)
print('Base Name: ',os.path.basename(full_name))
print(os.path.splitext(os.path.basename(full_name)))
print(filepath_to_name(full_name))

E:\Python Workspace\Semantic-Segmentation-Suite\CamVid\class_dict.csv
Base Name:  class_dict.csv
('class_dict', '.csv')
class_dict


### 2.2. LOG(X, f=None)

In [20]:
# Print with time. To console or file
def LOG(X, f=None):
    time_stamp = datetime.datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
    if not f:
        print(time_stamp + " " + X)
    else:
        f.write(time_stamp + " " + X)

Test the function

In [21]:
X = 'test information'
LOG(X, f=None)

[2018-05-04 21:05:02] test information


### 2.3. count_params()

In [22]:
# Count total number of trainable parameters in the model
def count_params():
    total_parameters = 0
    for variable in tf.trainable_variables():
        shape = variable.get_shape()
        variable_parameters = 1
        for dim in shape:
            variable_parameters *= dim.value
        total_parameters += variable_parameters
    print("This model has %d trainable parameters"% (total_parameters))

### 2.4. output = mean_image_subtraction(inputs, means=[123.68, 116.78, 103.94])

In [23]:
# Subtracts the mean images from ImageNet
def mean_image_subtraction(inputs, means=[123.68, 116.78, 103.94]):
    inputs=tf.to_float(inputs)
    # Get the last dim of inputs
    num_channels = inputs.get_shape().as_list()[-1]
    if len(means) != num_channels:
        raise ValueError('len(means) must match the number of channels')
    # split the image to independent channels
    channels = tf.split(axis=3, num_or_size_splits=num_channels, value=inputs)
    for i in range(num_channels):
        channels[i] -= means[i]
    return tf.concat(axis=3, values=channels)

Test the function

In [24]:
tf.reset_default_graph()
graph = tf.get_default_graph()
sess = tf.Session(graph=graph)

inputs = tf.constant([[[[0,100,200],[25,125,225]],[[50,150,250],[75,175,275]]],])
# inputs = tf.constant([[[[0,100],[25,125]],[[50,150],[75,175]]],])
inputs=tf.to_float(inputs)
print(sess.run(inputs))
num_channels = inputs.get_shape().as_list()[-1]
print(inputs.get_shape().as_list())
print(num_channels)
means=[123.68, 116.78, 103.94]
print(len(means))
channels = tf.split(axis=3, num_or_size_splits=num_channels, value=inputs)
print(sess.run(channels[0]))
print(sess.run(channels[1]))
print(sess.run(channels[2]))
print(sess.run(mean_image_subtraction(inputs, means=[123.68, 116.78, 103.94])))

sess.close()

[[[[  0. 100. 200.]
   [ 25. 125. 225.]]

  [[ 50. 150. 250.]
   [ 75. 175. 275.]]]]
[1, 2, 2, 3]
3
3
[[[[ 0.]
   [25.]]

  [[50.]
   [75.]]]]
[[[[100.]
   [125.]]

  [[150.]
   [175.]]]]
[[[[200.]
   [225.]]

  [[250.]
   [275.]]]]
[[[[-123.68      -16.779999   96.06    ]
   [ -98.68        8.220001  121.06    ]]

  [[ -73.68       33.22      146.06    ]
   [ -48.68       58.22      171.06    ]]]]


### 2.5 cropped_image = random_crop(image, label, crop_height, crop_width)

In [25]:
# Randomly crop the image to a specific size. For data augmentation
def random_crop(image, label, crop_height, crop_width):
    if (image.shape[0] != label.shape[0]) or (image.shape[1] != label.shape[1]):
        raise Exception('Image and label must have the same dimensions!')
        
    if (crop_width <= image.shape[1]) and (crop_height <= image.shape[0]):
        x = random.randint(0, image.shape[1]-crop_width)
        y = random.randint(0, image.shape[0]-crop_height)
        
        if len(label.shape) == 3:
            return image[y:y+crop_height, x:x+crop_width, :], label[y:y+crop_height, x:x+crop_width, :]
        else:
            return image[y:y+crop_height, x:x+crop_width, :], label[y:y+crop_height, x:x+crop_width]
    else:
        raise Exception('Crop shape exceeds image dimensions!')

This function applies only for images not for tensors!

In [26]:
image = np.array([[1,2,3],[4,5,6],[7,8,9]])
label = np.array([[1,1,0],[1,0,1],[0,1,1]])
cropped_image = random_crop(image, label, 2, 2)
print(cropped_image)

IndexError: too many indices for array

The function has problem. I rewrite the code as follows:

In [27]:
# Randomly crop the image to a specific size. For data augmentation
def random_crop(image, label, crop_height, crop_width):
    if (image.shape[0] != label.shape[0]) or (image.shape[1] != label.shape[1]):
        raise Exception('Image and label must have the same dimensions!')
        
    if (crop_width <= image.shape[1]) and (crop_height <= image.shape[0]):
        x = random.randint(0, image.shape[1]-crop_width)
        y = random.randint(0, image.shape[0]-crop_height)
        
        if len(label.shape) == 3:
            return image[y:y+crop_height, x:x+crop_width, :], label[y:y+crop_height, x:x+crop_width, :]
        else:
            return image[y:y+crop_height, x:x+crop_width], label[y:y+crop_height, x:x+crop_width]
    else:
        raise Exception('Crop shape exceeds image dimensions!')

In [28]:
image = np.array([[1,2,3],[4,5,6],[7,8,9]])
label = np.array([[1,1,0],[1,0,1],[0,1,1]])
cropped_image = random_crop(image, label, 2, 2)
print(image)
print(label)
print(cropped_image)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1 1 0]
 [1 0 1]
 [0 1 1]]
(array([[2, 3],
       [5, 6]]), array([[1, 0],
       [0, 1]]))


### 2.6 global_accuracy = compute_global_accuracy(pred, label)

In [29]:
# Compute the average segmentation accuracy across all classes
def compute_global_accuracy(pred, label):
    total = len(label)
    count = 0.0
    for i in range(total):
        if pred[i] == label[i]:
            count = count + 1.0
    return float(count) / float(total)

This definition only applies to 1d vector. So, I modified this as follows:

In [30]:
# Compute the average segmentation accuracy across all classes
def compute_global_accuracy(pred, label):
    f_pred = pred.flatten()
    f_label = label.flatten()
    count = np.sum(f_pred==f_label)
    return float(count) / float(len(f_pred))

In [31]:
pred = np.array([[1,0,1],[1,0,1],[0,1,1]])
label = np.array([[0,1,0],[1,0,1],[0,1,1]])
global_accuracy = compute_global_accuracy(pred, label)
print(global_accuracy)

0.6666666666666666


### 2.7 accuracies = compute_class_accuracies(pred, label, num_classes)

In [32]:
# Compute the class-specific segmentation accuracy
def compute_class_accuracies(pred, label, num_classes):
    total = []
    for val in range(num_classes):
        total.append((label == val).sum())

    count = [0.0] * num_classes
    for i in range(len(label)):
        if pred[i] == label[i]:
            count[int(pred[i])] = count[int(pred[i])] + 1.0

    # If there are no pixels from a certain class in the GT, 
    # it returns NAN because of divide by zero
    # Replace the nans with a 1.0.
    accuracies = []
    for i in range(len(total)):
        if total[i] == 0:
            accuracies.append(1.0)
        else:
            accuracies.append(count[i] / total[i])

    return accuracies

This definition only applies to 1d vector. So, I modified this as follows:

In [33]:
# Compute the class-specific segmentation accuracy
def compute_class_accuracies(pred, label, num_classes):
    pred = pred.flatten()
    label = label.flatten()
    total = []
    for val in range(num_classes):
        total.append((label == val).sum())

    count = [0.0] * num_classes
    for i in range(len(label)):
        if pred[i] == label[i]:
            count[int(pred[i])] = count[int(pred[i])] + 1.0

    # If there are no pixels from a certain class in the GT, 
    # it returns NAN because of divide by zero
    # Replace the nans with a 1.0.
    accuracies = []
    for i in range(len(total)):
        if total[i] == 0:
            accuracies.append(1.0)
        else:
            accuracies.append(count[i] / total[i])

    return accuracies

In [34]:
print(compute_class_accuracies(pred, label, 2))

[0.5, 0.8]


### 2.8 mean_iou = compute_mean_iou(pred, label)

In [35]:
def compute_mean_iou(pred, label):

    unique_labels = np.unique(label)
    num_unique_labels = len(unique_labels);

    I = np.zeros(num_unique_labels)
    U = np.zeros(num_unique_labels)

    for index, val in enumerate(unique_labels):
        pred_i = pred == val
        label_i = label == val

        I[index] = float(np.sum(np.logical_and(label_i, pred_i)))
        U[index] = float(np.sum(np.logical_or(label_i, pred_i)))


    mean_iou = np.mean(I / U)
    return mean_iou

This definition only applies to 1d vector. So, I modified this as follows:

In [36]:
def compute_mean_iou(pred, label):
    pred = pred.flatten()
    label = label.flatten()
    
    unique_labels = np.unique(label)
    num_unique_labels = len(unique_labels);

    I = np.zeros(num_unique_labels)
    U = np.zeros(num_unique_labels)

    for index, val in enumerate(unique_labels):
        pred_i = pred == val
        label_i = label == val

        I[index] = float(np.sum(np.logical_and(label_i, pred_i)))
        U[index] = float(np.sum(np.logical_or(label_i, pred_i)))

    mean_iou = np.mean(I / U)
    return mean_iou

In [37]:
print(compute_mean_iou(pred, label))

0.4857142857142857


### 2.9 evaluate_segmentation(pred, label, num_classes, score_averaging="weighted")

In [38]:
def evaluate_segmentation(pred, label, num_classes, score_averaging="weighted"):
    flat_pred = pred.flatten()
    flat_label = label.flatten()

    global_accuracy = compute_global_accuracy(flat_pred, flat_label)
    class_accuracies = compute_class_accuracies(flat_pred, flat_label, num_classes)

    prec = precision_score(flat_pred, flat_label, average=score_averaging)
    rec = recall_score(flat_pred, flat_label, average=score_averaging)
    f1 = f1_score(flat_pred, flat_label, average=score_averaging)

    iou = compute_mean_iou(flat_pred, flat_label)

    return global_accuracy, class_accuracies, prec, rec, f1, iou

In [39]:
evaluate_segmentation(pred, label, 2, score_averaging="weighted")

(0.6666666666666666,
 [0.5, 0.8],
 0.7000000000000001,
 0.6666666666666666,
 0.6753246753246753,
 0.4857142857142857)

### 2.10 compute_class_weights(labels_dir, label_values)

In [40]:
def compute_class_weights(labels_dir, label_values):
    '''
    Arguments:
        labels_dir(list): Directory where the image segmentation labels are
        num_classes(int): the number of classes of pixels in all images

    Returns:
        class_weights(list): a list of class weights where each index represents each class label and the element is the class weight for that label.

    '''
    image_files = [os.path.join(labels_dir, file) for file in os.listdir(labels_dir) if file.endswith('.png')]

    num_classes = len(label_values)

    class_pixels = np.zeros(num_classes) 

    total_pixels = 0.0

    for n in range(len(image_files)):
        image = imread(image_files[n])

        for index, colour in enumerate(label_values):
            class_map = np.all(np.equal(image, colour), axis = -1)
            class_map = class_map.astype(np.float32)
            class_pixels[index] += np.sum(class_map)

            
        print("\rProcessing image: " + str(n) + " / " + str(len(image_files)), end="")
        sys.stdout.flush()

    total_pixels = float(np.sum(class_pixels))
    index_to_delete = np.argwhere(class_pixels==0.0)
    class_pixels = np.delete(class_pixels, index_to_delete)

    class_weights = total_pixels / class_pixels
    class_weights = class_weights / np.sum(class_weights)

    return class_weights

### 2.11 memory()

In [41]:
import os
import psutil

In [42]:
# Compute the memory usage, for debugging
def memory():
    pid = os.getpid()
    py = psutil.Process(pid)
    memoryUse = py.memory_info()[0]/2.**30  # Memory use in GB
    print('Memory usage in GBs:', memoryUse)

In [43]:
pid = os.getpid()
print(pid)

12148


In [44]:
py = psutil.Process(pid)
print(py)

psutil.Process(pid=12148, name='python.exe', started='21:00:46')


In [45]:
print(py.memory_info())

pmem(rss=208134144, vms=184291328, num_page_faults=59173, peak_wset=208134144, wset=208134144, peak_paged_pool=549824, paged_pool=549552, peak_nonpaged_pool=1161536, nonpaged_pool=156408, pagefile=184291328, peak_pagefile=184291328, private=184291328)


In [46]:
print(py.memory_info()[0]/2.**30)

0.19385147094726562


In [47]:
memory()

Memory usage in GBs: 0.19385528564453125


## Check all functions in main.py

In [48]:
from __future__ import print_function
import os,time,cv2, sys, math
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np
import time, datetime
import argparse
import random
import os, sys
import subprocess


# import helpers 
# import utils 

import matplotlib.pyplot as plt

sys.path.append("models")
from FC_DenseNet_Tiramisu import build_fc_densenet
from Encoder_Decoder import build_encoder_decoder
from RefineNet import build_refinenet
from FRRN import build_frrn
from MobileUNet import build_mobile_unet
from PSPNet import build_pspnet
from GCN import build_gcn
from DeepLabV3 import build_deeplabv3
from DeepLabV3_plus import build_deeplabv3_plus
from AdapNet import build_adaptnet

### 3.1 str2bool(v)

In [49]:
def str2bool(v):
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

In [50]:
print(str2bool('yes'))
print(str2bool('no'))
print(str2bool('Yes'))
print(str2bool('Fool'))

True
False
True


ArgumentTypeError: Boolean value expected.

### 3.2 argparse

In [51]:
parser = argparse.ArgumentParser()
parser.add_argument('--num_epochs', type=int, default=1, help='Number of epochs to train for')
parser.add_argument('--mode', type=str, default="train", help='Select "train", "test", or "predict" mode. \
    Note that for prediction mode you have to specify an image to run the model on.')
parser.add_argument('--class_balancing', type=str2bool, default=False, help='Whether to use median frequency class weights to balance the classes in the loss')
parser.add_argument('--image', type=str, default=None, help='The image you want to predict on. Only valid in "predict" mode.')
parser.add_argument('--continue_training', type=str2bool, default=False, help='Whether to continue training from a checkpoint')
parser.add_argument('--dataset', type=str, default="CamVid", help='Dataset you are using.')
parser.add_argument('--crop_height', type=int, default=256, help='Height of cropped input image to network')
parser.add_argument('--crop_width', type=int, default=256, help='Width of cropped input image to network')
parser.add_argument('--batch_size', type=int, default=1, help='Number of images in each batch')
parser.add_argument('--num_val_images', type=int, default=10, help='The number of images to used for validations')
parser.add_argument('--h_flip', type=str2bool, default=False, help='Whether to randomly flip the image horizontally for data augmentation')
parser.add_argument('--v_flip', type=str2bool, default=False, help='Whether to randomly flip the image vertically for data augmentation')
parser.add_argument('--brightness', type=float, default=None, help='Whether to randomly change the image brightness for data augmentation. Specifies the max bightness change.')
parser.add_argument('--rotation', type=float, default=None, help='Whether to randomly rotate the image for data augmentation. Specifies the max rotation angle.')
parser.add_argument('--model', type=str, default="MobileUNet-Skip", help='The model you are using. Currently supports:\
    FC-DenseNet56, FC-DenseNet67, FC-DenseNet103, Encoder-Decoder, Encoder-Decoder-Skip, RefineNet-Res50, RefineNet-Res101, RefineNet-Res152, \
    FRRN-A, FRRN-B, MobileUNet, MobileUNet-Skip, PSPNet-Res50, PSPNet-Res101, PSPNet-Res152, GCN-Res50, GCN-Res101, GCN-Res152, DeepLabV3-Res50 \
    DeepLabV3-Res101, DeepLabV3-Res152, DeepLabV3_plus-Res50, DeepLabV3_plus-Res101, DeepLabV3_plus-Res152, AdapNet, custom')

_StoreAction(option_strings=['--model'], dest='model', nargs=None, const=None, default='MobileUNet-Skip', type=<class 'str'>, choices=None, help='The model you are using. Currently supports:    FC-DenseNet56, FC-DenseNet67, FC-DenseNet103, Encoder-Decoder, Encoder-Decoder-Skip, RefineNet-Res50, RefineNet-Res101, RefineNet-Res152,     FRRN-A, FRRN-B, MobileUNet, MobileUNet-Skip, PSPNet-Res50, PSPNet-Res101, PSPNet-Res152, GCN-Res50, GCN-Res101, GCN-Res152, DeepLabV3-Res50     DeepLabV3-Res101, DeepLabV3-Res152, DeepLabV3_plus-Res50, DeepLabV3_plus-Res101, DeepLabV3_plus-Res152, AdapNet, custom', metavar=None)

In [52]:
args = parser.parse_args('')

In [53]:
print(args)

Namespace(batch_size=1, brightness=None, class_balancing=False, continue_training=False, crop_height=256, crop_width=256, dataset='CamVid', h_flip=False, image=None, mode='train', model='MobileUNet-Skip', num_epochs=1, num_val_images=10, rotation=None, v_flip=False)


In [54]:
args.crop_height

256

### 3.3 prepare_data(dataset_dir=args.dataset)

In [55]:
# Get a list of the training, validation, and testing file paths
def prepare_data(dataset_dir=args.dataset):
    train_input_names=[]
    train_output_names=[]
    val_input_names=[]
    val_output_names=[]
    test_input_names=[]
    test_output_names=[]
    for file in os.listdir(dataset_dir + "/train"):
        cwd = os.getcwd()
        train_input_names.append(cwd + "/" + dataset_dir + "/train/" + file)
    for file in os.listdir(dataset_dir + "/train_labels"):
        cwd = os.getcwd()
        train_output_names.append(cwd + "/" + dataset_dir + "/train_labels/" + file)
    for file in os.listdir(dataset_dir + "/val"):
        cwd = os.getcwd()
        val_input_names.append(cwd + "/" + dataset_dir + "/val/" + file)
    for file in os.listdir(dataset_dir + "/val_labels"):
        cwd = os.getcwd()
        val_output_names.append(cwd + "/" + dataset_dir + "/val_labels/" + file)
    for file in os.listdir(dataset_dir + "/test"):
        cwd = os.getcwd()
        test_input_names.append(cwd + "/" + dataset_dir + "/test/" + file)
    for file in os.listdir(dataset_dir + "/test_labels"):
        cwd = os.getcwd()
        test_output_names.append(cwd + "/" + dataset_dir + "/test_labels/" + file)
    train_input_names.sort(),train_output_names.sort(), val_input_names.sort(), val_output_names.sort(), test_input_names.sort(), test_output_names.sort()
    return train_input_names,train_output_names, val_input_names, val_output_names, test_input_names, test_output_names

This function is used to prepare the paths of the data

In [56]:
dataset_dir = 'CamVid'

In [57]:
os.listdir(dataset_dir + "/train")[:5]

['0001TP_006690.png',
 '0001TP_006720.png',
 '0001TP_006750.png',
 '0001TP_006780.png',
 '0001TP_006810.png']

In [58]:
os.getcwd()

'E:\\Python Workspace\\Semantic-Segmentation-Suite'

In [59]:
os.getcwd() + "/" + dataset_dir + "/train/" + os.listdir(dataset_dir + "/train")[0]

'E:\\Python Workspace\\Semantic-Segmentation-Suite/CamVid/train/0001TP_006690.png'

Note that the above notation is compatible with linux platform but not with Windows platform!

In [60]:
train_input_names,train_output_names, val_input_names, val_output_names, test_input_names, test_output_names = prepare_data(dataset_dir)

In [61]:
val_input_names[:5]

['E:\\Python Workspace\\Semantic-Segmentation-Suite/CamVid/val/0001TP_006870.png',
 'E:\\Python Workspace\\Semantic-Segmentation-Suite/CamVid/val/0001TP_006900.png',
 'E:\\Python Workspace\\Semantic-Segmentation-Suite/CamVid/val/0001TP_006930.png',
 'E:\\Python Workspace\\Semantic-Segmentation-Suite/CamVid/val/0001TP_006960.png',
 'E:\\Python Workspace\\Semantic-Segmentation-Suite/CamVid/val/0001TP_007530.png']

### 3.4 load_image(path)

In [62]:
def load_image(path):
    image = cv2.cvtColor(cv2.imread(path,-1), cv2.COLOR_BGR2RGB)
    return image

In [63]:
cv2.imshow('test',load_image(val_input_names[0]))
cv2.waitKey(-1)
cv2.destroyAllWindows()

In [64]:
load_image(val_input_names[0]).shape

(720, 960, 3)

In [65]:
load_image(val_input_names[0]).dtype

dtype('uint8')

### 3.5 data_augmentation(input_image, output_image)

In [66]:
def data_augmentation(input_image, output_image):
    # Data augmentation
    input_image, output_image = utils.random_crop(input_image, output_image, args.crop_height, args.crop_width)

    if args.h_flip and random.randint(0,1):
        input_image = cv2.flip(input_image, 1)
        output_image = cv2.flip(output_image, 1)
    if args.v_flip and random.randint(0,1):
        input_image = cv2.flip(input_image, 0)
        output_image = cv2.flip(output_image, 0)
    if args.brightness:
        factor = random.uniform(-1*args.brightness, args.brightness)
        table = np.array([((i / 255.0) ** factor) * 255 for i in np.arange(0, 256)]).astype(np.uint8)
        input_image = cv2.LUT(input_image, table)
    if args.rotation:
        angle = random.uniform(-1*args.rotation, args.rotation)
    if args.rotation:
        M = cv2.getRotationMatrix2D((input_image.shape[1]//2, input_image.shape[0]//2), angle, 1.0)
        input_image = cv2.warpAffine(input_image, M, (input_image.shape[1], input_image.shape[0]), flags=INTER_NEAREST)
        output_image = cv2.warpAffine(output_image, M, (output_image.shape[1], output_image.shape[0]), flags=INTER_NEAREST)

    return input_image, output_image

In [67]:
input_raw = load_image(val_input_names[0])
output_raw = load_image(val_output_names[0])
print(input_raw.shape)
print(output_raw.shape)

(720, 960, 3)
(720, 960, 3)


In [68]:
input_cropped, output_cropped = random_crop(input_raw, output_raw, args.crop_height, args.crop_width)
print(input_cropped.shape)
print(output_cropped.shape)

(256, 256, 3)
(256, 256, 3)


In [69]:
args.h_flip

False

In [70]:
random.randint(0,1)

0

In [71]:
args.brightness = 0.5
print(args.brightness)

0.5


In [72]:
random.uniform(-1*args.brightness, args.brightness)

0.15869883118469497

In [73]:
np.array([((i / 255.0) ** 0.5) * 255 for i in np.arange(0, 256)]).astype(np.uint8)

array([  0,  15,  22,  27,  31,  35,  39,  42,  45,  47,  50,  52,  55,
        57,  59,  61,  63,  65,  67,  69,  71,  73,  74,  76,  78,  79,
        81,  82,  84,  85,  87,  88,  90,  91,  93,  94,  95,  97,  98,
        99, 100, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 114,
       115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
       128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
       141, 141, 142, 143, 144, 145, 146, 147, 148, 148, 149, 150, 151,
       152, 153, 153, 154, 155, 156, 157, 158, 158, 159, 160, 161, 162,
       162, 163, 164, 165, 165, 166, 167, 168, 168, 169, 170, 171, 171,
       172, 173, 174, 174, 175, 176, 177, 177, 178, 179, 179, 180, 181,
       182, 182, 183, 184, 184, 185, 186, 186, 187, 188, 188, 189, 190,
       190, 191, 192, 192, 193, 194, 194, 195, 196, 196, 197, 198, 198,
       199, 200, 200, 201, 201, 202, 203, 203, 204, 205, 205, 206, 206,
       207, 208, 208, 209, 210, 210, 211, 211, 212, 213, 213, 21

In [74]:
0.5**0.5

0.7071067811865476

In [75]:
input_LUT = cv2.LUT(input_cropped, np.array([((i / 255.0) ** 0.5) * 255 for i in np.arange(0, 256)]).astype(np.uint8))
output_LUT = cv2.LUT(output_cropped, np.array([((i / 255.0) ** 0.5) * 255 for i in np.arange(0, 256)]).astype(np.uint8))

In [76]:
cv2.imshow('input_cropped',input_cropped)
cv2.imshow('input_LUT',input_LUT)
cv2.waitKey(-1)
cv2.destroyAllWindows()

The above test indicate that the definition of "factor = random.uniform(-1*args.brightness, args.brightness)" is problematic. We have to set "factor" in the range of \[0,+infinite\]. So the function is modified as:

In [77]:
def data_augmentation(input_image, output_image):
    # Data augmentation
    input_image, output_image = utils.random_crop(input_image, output_image, args.crop_height, args.crop_width)

    if args.h_flip and random.randint(0,1):
        input_image = cv2.flip(input_image, 1)
        output_image = cv2.flip(output_image, 1)
    if args.v_flip and random.randint(0,1):
        input_image = cv2.flip(input_image, 0)
        output_image = cv2.flip(output_image, 0)
    if args.brightness:
        factor = random.uniform(np.exp(-1*args.brightness), np.exp(args.brightness))
        table = np.array([((i / 255.0) ** factor) * 255 for i in np.arange(0, 256)]).astype(np.uint8)
        input_image = cv2.LUT(input_image, table)
    if args.rotation:
        angle = random.uniform(-1*args.rotation, args.rotation)
        M = cv2.getRotationMatrix2D((input_image.shape[1]//2, input_image.shape[0]//2), angle, 1.0)
        input_image = cv2.warpAffine(input_image, M, (input_image.shape[1], input_image.shape[0]), flags=INTER_NEAREST)
        output_image = cv2.warpAffine(output_image, M, (output_image.shape[1], output_image.shape[0]), flags=INTER_NEAREST)

    return input_image, output_image

In [78]:
np.exp(0.5)

1.6487212707001282

In [79]:
np.exp(-0.5)

0.6065306597126334

In [80]:
args.brightness

0.5

In [81]:
random.uniform(np.exp(-1*args.brightness), np.exp(args.brightness))

1.271366606701244

In [82]:
args.rotation = 45
print(args.rotation)

45


In [83]:
random.uniform(-1*args.rotation, args.rotation)

9.74625954458827

In [84]:
cv2.getRotationMatrix2D((input_LUT.shape[1]//2, input_LUT.shape[0]//2), 45, 1.0) # rotation center.x;center.y;degree;scale

array([[  0.70710678,   0.70710678, -53.01933598],
       [ -0.70710678,   0.70710678, 128.        ]])

In [85]:
np.cos(0.5)

0.8775825618903728

In [86]:
input_LUT.shape

(256, 256, 3)

In [87]:
input_rotated = cv2.warpAffine(input_LUT, 
                               cv2.getRotationMatrix2D((input_LUT.shape[1]//2, input_LUT.shape[0]//2), 45, 1.0), 
                               (input_LUT.shape[1], input_LUT.shape[0]), flags=INTER_NEAREST)

NameError: name 'INTER_NEAREST' is not defined

See, this code has error. I have to modify this as follows:

In [88]:
input_rotated = cv2.warpAffine(input_LUT, 
                               cv2.getRotationMatrix2D((input_LUT.shape[1]//2, input_LUT.shape[0]//2), 45, 1.0), 
                               (input_LUT.shape[1], input_LUT.shape[0]), flags=cv2.INTER_NEAREST)

In [89]:
cv2.imshow('input_LUT',input_LUT)
cv2.imshow('input_rotated',input_rotated)
cv2.waitKey(-1)
cv2.destroyAllWindows()

The final version of the function should be:

In [90]:
def data_augmentation(input_image, output_image):
    # Data augmentation
    input_image, output_image = random_crop(input_image, output_image, args.crop_height, args.crop_width)

    if args.h_flip and random.randint(0,1):
        input_image = cv2.flip(input_image, 1)
        output_image = cv2.flip(output_image, 1)
    if args.v_flip and random.randint(0,1):
        input_image = cv2.flip(input_image, 0)
        output_image = cv2.flip(output_image, 0)
    if args.brightness:
        factor = random.uniform(np.exp(-1*args.brightness), np.exp(args.brightness))
        table = np.array([((i / 255.0) ** factor) * 255 for i in np.arange(0, 256)]).astype(np.uint8)
        input_image = cv2.LUT(input_image, table)
    if args.rotation:
        angle = random.uniform(-1*args.rotation, args.rotation)
        M = cv2.getRotationMatrix2D((input_image.shape[1]//2, input_image.shape[0]//2), angle, 1.0)
        input_image = cv2.warpAffine(input_image, M, (input_image.shape[1], input_image.shape[0]), flags=cv2.INTER_NEAREST)
        output_image = cv2.warpAffine(output_image, M, (output_image.shape[1], output_image.shape[0]), flags=cv2.INTER_NEAREST)

    return input_image, output_image

### 3.6 download_checkpoints(model_name)

In [91]:
def download_checkpoints(model_name):
    subprocess.check_output(["python", "get_pretrained_checkpoints.py", "--model=" + model_name])

In [92]:
subprocess.check_output("dir", shell=True, universal_newlines=True) 

' 驱动器 E 中的卷是 Program and Projects\n 卷的序列号是 0754-B8FD\n\n E:\\Python Workspace\\Semantic-Segmentation-Suite 的目录\n\n2018/05/04  21:11    <DIR>          .\n2018/05/04  21:11    <DIR>          ..\n2018/04/17  17:04               160 .gitignore\n2018/04/20  14:20    <DIR>          .ipynb_checkpoints\n2018/04/17  17:04    <DIR>          CamVid\n2018/04/20  14:01    <DIR>          checkpoints\n2018/04/17  17:04             4,951 get_pretrained_checkpoints.py\n2018/04/20  11:42             4,318 helpers.py\n2018/04/17  17:04    <DIR>          Images\n2018/05/04  21:11            82,087 Learn_the_code.ipynb\n2018/04/20  14:51            24,447 main.py\n2018/04/27  09:16    <DIR>          models\n2018/04/17  17:04            12,358 README.md\n2018/04/17  17:04    <DIR>          Test\n2018/04/17  17:04             6,443 utils.py\n2018/04/20  11:42    <DIR>          __pycache__\n               7 个文件        134,764 字节\n               9 个目录 238,425,235,456 可用字节\n'

In [93]:
subprocess.run("dir", shell=True, universal_newlines=True, check=True, stdout=subprocess.PIPE).stdout

' 驱动器 E 中的卷是 Program and Projects\n 卷的序列号是 0754-B8FD\n\n E:\\Python Workspace\\Semantic-Segmentation-Suite 的目录\n\n2018/05/04  21:11    <DIR>          .\n2018/05/04  21:11    <DIR>          ..\n2018/04/17  17:04               160 .gitignore\n2018/04/20  14:20    <DIR>          .ipynb_checkpoints\n2018/04/17  17:04    <DIR>          CamVid\n2018/04/20  14:01    <DIR>          checkpoints\n2018/04/17  17:04             4,951 get_pretrained_checkpoints.py\n2018/04/20  11:42             4,318 helpers.py\n2018/04/17  17:04    <DIR>          Images\n2018/05/04  21:11            82,087 Learn_the_code.ipynb\n2018/04/20  14:51            24,447 main.py\n2018/04/27  09:16    <DIR>          models\n2018/04/17  17:04            12,358 README.md\n2018/04/17  17:04    <DIR>          Test\n2018/04/17  17:04             6,443 utils.py\n2018/04/20  11:42    <DIR>          __pycache__\n               7 个文件        134,764 字节\n               9 个目录 238,425,235,456 可用字节\n'

In [94]:
download_checkpoints("Res101")

CalledProcessError: Command '['python', 'get_pretrained_checkpoints.py', '--model=Res101']' returned non-zero exit status 1

For Windows, one has to download and extract the file by oneself

### 3.7 get_label_info

In [95]:
# Get the names of the classes so we can record the evaluation results
class_names_list, label_values = get_label_info(os.path.join(args.dataset, "class_dict.csv"))
class_names_string = ""
for class_name in class_names_list:
    if not class_name == class_names_list[-1]:
        class_names_string = class_names_string + class_name + ", "
    else:
        class_names_string = class_names_string + class_name

num_classes = len(label_values)

In [96]:
args.dataset

'CamVid'

In [97]:
print(class_names_string)
print(num_classes)

Animal, Archway, Bicyclist, Bridge, Building, Car, CartLuggagePram, Child, Column_Pole, Fence, LaneMkgsDriv, LaneMkgsNonDriv, Misc_Text, MotorcycleScooter, OtherMoving, ParkingBlock, Pedestrian, Road, RoadShoulder, Sidewalk, SignSymbol, Sky, SUVPickupTruck, TrafficCone, TrafficLight, Train, Tree, Truck_Bus, Tunnel, VegetationMisc, Void, Wall
32


### 3.8 Prepare the model

In [98]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess=tf.Session(config=config)

In [99]:
# Get the selected model. 
# Some of they require pre-trained ResNet
print("Preparing the model ...")
input = tf.placeholder(tf.float32,shape=[None,None,None,3])
output = tf.placeholder(tf.float32,shape=[None,None,None,num_classes]) 

Preparing the model ...


#### 3.8.1 PSPNet.py

In [100]:
import tensorflow as tf
from tensorflow.contrib import slim
import numpy as np
import resnet_v2
import os, sys

In [101]:
def Upsampling(inputs,feature_map_shape):
    return tf.image.resize_bilinear(inputs, size=feature_map_shape)

In [102]:
def ConvUpscaleBlock(inputs, n_filters, kernel_size=[3, 3], scale=2):
    """
    Basic conv transpose block for Encoder-Decoder upsampling
    Apply successivly Transposed Convolution, BatchNormalization, ReLU nonlinearity
    """
    net = slim.conv2d_transpose(inputs, n_filters, kernel_size=[3, 3], stride=[2, 2], activation_fn=None)
    net = tf.nn.relu(slim.batch_norm(net, fused=True))
    return net

In [103]:
def ConvBlock(inputs, n_filters, kernel_size=[3, 3]):
    """
    Basic conv block for Encoder-Decoder
    Apply successivly Convolution, BatchNormalization, ReLU nonlinearity
    """
    net = slim.conv2d(inputs, n_filters, kernel_size, activation_fn=None, normalizer_fn=None)
    net = tf.nn.relu(slim.batch_norm(net, fused=True))
    return net

In [104]:
def InterpBlock(net, level, feature_map_shape, pooling_type):
    
    # Compute the kernel and stride sizes according to how large the final feature map will be
    # When the kernel size and strides are equal, then we can compute the final feature map size
    # by simply dividing the current size by the kernel or stride size
    # The final feature map sizes are 1x1, 2x2, 3x3, and 6x6. We round to the closest integer
    kernel_size = [int(np.round(float(feature_map_shape[0]) / float(level))), int(np.round(float(feature_map_shape[1]) / float(level)))]
    stride_size = kernel_size

    net = slim.pool(net, kernel_size, stride=stride_size, pooling_type='MAX')
    net = slim.conv2d(net, 512, [1, 1], activation_fn=None)
    net = slim.batch_norm(net, fused=True)
    net = tf.nn.relu(net)
    net = Upsampling(net, feature_map_shape)
    return net

In [105]:
def PyramidPoolingModule(inputs, feature_map_shape, pooling_type):
    """
    Build the Pyramid Pooling Module.
    """

    interp_block1 = InterpBlock(inputs, 1, feature_map_shape, pooling_type)
    interp_block2 = InterpBlock(inputs, 2, feature_map_shape, pooling_type)
    interp_block3 = InterpBlock(inputs, 3, feature_map_shape, pooling_type)
    interp_block6 = InterpBlock(inputs, 6, feature_map_shape, pooling_type)

    res = tf.concat([inputs, interp_block6, interp_block3, interp_block2, interp_block1], axis=-1)
    return res

In [106]:
def build_pspnet(inputs, label_size, num_classes, preset_model='PSPNet-Res50', pooling_type = "MAX",
    weight_decay=1e-5, upscaling_method="conv", is_training=True, pretrained_dir="models"):
    """
    Builds the PSPNet model. 

    Arguments:
      inputs: The input tensor
      label_size: Size of the final label tensor. We need to know this for proper upscaling 
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
      num_classes: Number of classes
      pooling_type: Max or Average pooling

    Returns:
      PSPNet model
    """

    if preset_model == 'PSPNet-Res50':
        with slim.arg_scope(resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v2.resnet_v2_50(inputs, is_training=is_training, scope='resnet_v2_50')
            resnet_scope='resnet_v2_50'
            # PSPNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v2_50.ckpt'), slim.get_model_variables('resnet_v2_50'))
    elif preset_model == 'PSPNet-Res101':
        with slim.arg_scope(resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v2.resnet_v2_101(inputs, is_training=is_training, scope='resnet_v2_101')
            resnet_scope='resnet_v2_101'
            # PSPNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v2_101.ckpt'), slim.get_model_variables('resnet_v2_101'))
    elif preset_model == 'PSPNet-Res152':
        with slim.arg_scope(resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v2.resnet_v2_152(inputs, is_training=is_training, scope='resnet_v2_152')
            resnet_scope='resnet_v2_152'
            # PSPNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v2_152.ckpt'), slim.get_model_variables('resnet_v2_152'))
    else:
        raise ValueError("Unsupported ResNet model '%s'. This function only supports ResNet 50, ResNet 101, and ResNet 152" % (preset_model))

    feature_map_shape = [int(x / 8.0) for x in label_size]
    print(feature_map_shape)
    psp = PyramidPoolingModule(end_points['pool3'], feature_map_shape=feature_map_shape, pooling_type=pooling_type)

    net = slim.conv2d(psp, 512, [3, 3], activation_fn=None)
    net = slim.batch_norm(net, fused=True)
    net = tf.nn.relu(net)

    if upscaling_method.lower() == "conv":
        net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 256)
        net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 128)
        net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 64)
    elif upscaling_method.lower() == "bilinear":
        net = Upsampling(net, label_size)
    
    net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits')

    return net, init_fn

```
# Redundent Redefinition
def mean_image_subtraction(inputs, means=[123.68, 116.78, 103.94]):
    inputs=tf.to_float(inputs)
    num_channels = inputs.get_shape().as_list()[-1]
    if len(means) != num_channels:
        raise ValueError('len(means) must match the number of channels')
    channels = tf.split(axis=3, num_or_size_splits=num_channels, value=inputs)
    for i in range(num_channels):
        channels[i] -= means[i]
    return tf.concat(axis=3, values=channels)
```

### 3.8.2 build_pspnet

In [112]:
args.model = "PSPNet-Res50"

In [114]:
    # Image size is required for PSPNet
    # PSPNet requires pre-trained ResNet weights
    network, init_fn = build_pspnet(input, label_size=[args.crop_height, args.crop_width], preset_model = args.model, num_classes=num_classes)

[32, 32]


In [113]:
print(args.crop_height)
print(args.crop_width)
print(args.model)
print(num_classes)

256
256
PSPNet-Res50
32


In [115]:
print(network)
print(init_fn)

Tensor("logits/BiasAdd:0", shape=(?, 256, 256, 32), dtype=float32)
<function assign_from_checkpoint_fn.<locals>.callback at 0x000001B9C84E0D90>


### 3.8.3 Compute your softmax cross entropy loss

In [117]:
# Compute your softmax cross entropy loss
loss = None
if args.class_balancing:
    print("Computing class weights for", args.dataset, "...")
    class_weights = utils.compute_class_weights(labels_dir=args.dataset + "/train_labels", label_values=label_values)
    unweighted_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=network, labels=output))
    loss = tf.reduce_mean(unweighted_loss * class_weights)
else:
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=network, labels=output))

In [116]:
print(args.class_balancing)

False


### 3.8.4 others

In [118]:
opt = tf.train.AdamOptimizer(0.0001).minimize(loss, var_list=[var for var in tf.trainable_variables()])

saver=tf.train.Saver(max_to_keep=1000)

## 3.9 initialize vars

In [120]:
sess.run(tf.global_variables_initializer())

count_params()

This model has 36991904 trainable parameters


## 3.10 Load weights

In [121]:
# If a pre-trained ResNet is required, load the weights.
# This must be done AFTER the variables are initialized with sess.run(tf.global_variables_initializer())
if init_fn is not None:
    init_fn(sess)

INFO:tensorflow:Restoring parameters from models\resnet_v2_50.ckpt


In [122]:
print(args.mode)

train


## 3.11 Training

In [123]:
    print("\n***** Begin training *****")
    print("Dataset -->", args.dataset)
    print("Model -->", args.model)
    print("Crop Height -->", args.crop_height)
    print("Crop Width -->", args.crop_width)
    print("Num Epochs -->", args.num_epochs)
    print("Batch Size -->", args.batch_size)
    print("Num Classes -->", num_classes)

    print("Data Augmentation:")
    print("\tVertical Flip -->", args.v_flip)
    print("\tHorizontal Flip -->", args.h_flip)
    print("\tBrightness Alteration -->", args.brightness)
    print("\tRotation -->", args.rotation)
    print("")


***** Begin training *****
Dataset --> CamVid
Model --> PSPNet-Res50
Crop Height --> 256
Crop Width --> 256
Num Epochs --> 1
Batch Size --> 1
Num Classes --> 32
Data Augmentation:
	Vertical Flip --> False
	Horizontal Flip --> False
	Brightness Alteration --> 0.5
	Rotation --> 45



In [124]:
args.num_val_images

10

In [127]:
args.batch_size

1

In [131]:
avg_loss_per_epoch = []

# Which validation images do we want
val_indices = []
num_vals = min(args.num_val_images, len(val_input_names))

# Set random seed to make sure models are validated on the same validation images.
# So you can compare the results of different models more intuitively.
random.seed(16)
val_indices=random.sample(range(0,len(val_input_names)),num_vals)

# Do the training here
for epoch in range(0, args.num_epochs):

    current_losses = []

    cnt=0

    # Equivalent to shuffling
    id_list = np.random.permutation(len(train_input_names))

    num_iters = int(np.floor(len(id_list) / args.batch_size))
    st = time.time()
    epoch_st=time.time()
    for i in range(num_iters):
        # st=time.time()

        input_image_batch = []
        output_image_batch = [] 

        # Collect a batch of images
        for j in range(args.batch_size):
            index = i*args.batch_size + j
            id = id_list[index]
            input_image = load_image(train_input_names[id])
            output_image = load_image(train_output_names[id])

            # with tf.device('/cpu:0'):
            input_image, output_image = data_augmentation(input_image, output_image)


            # Prep the data. Make sure the labels are in one-hot format
            input_image = np.float32(input_image) / 255.0
            output_image = np.float32(one_hot_it(label=output_image, label_values=label_values))

            input_image_batch.append(np.expand_dims(input_image, axis=0))
            output_image_batch.append(np.expand_dims(output_image, axis=0))
                

        # ***** THIS CAUSES A MEMORY LEAK AS NEW TENSORS KEEP GETTING CREATED *****
        # input_image = tf.image.crop_to_bounding_box(input_image, offset_height=0, offset_width=0, 
        #                                               target_height=args.crop_height, target_width=args.crop_width).eval(session=sess)
        # output_image = tf.image.crop_to_bounding_box(output_image, offset_height=0, offset_width=0, 
        #                                               target_height=args.crop_height, target_width=args.crop_width).eval(session=sess)
        # ***** THIS CAUSES A MEMORY LEAK AS NEW TENSORS KEEP GETTING CREATED *****

        # memory()

        if args.batch_size == 1:
            input_image_batch = input_image_batch[0]
            print(input_image_batch.shape)
            output_image_batch = output_image_batch[0]
            print(output_image_batch.shape)
        else:
            input_image_batch = np.squeeze(np.stack(input_image_batch, axis=1))
            output_image_batch = np.squeeze(np.stack(output_image_batch, axis=1))

        # Do the training
        _,current=sess.run([opt,loss],feed_dict={input:input_image_batch,output:output_image_batch})
        current_losses.append(current)
        cnt = cnt + args.batch_size
        if cnt % 20 == 0:
            string_print = "Epoch = %d Count = %d Current_Loss = %.4f Time = %.2f"%(epoch,cnt,current,time.time()-st)
            LOG(string_print)
            st = time.time()

    mean_loss = np.mean(current_losses)
    avg_loss_per_epoch.append(mean_loss)

    # Create directories if needed
    if not os.path.isdir("%s/%04d"%("checkpoints",epoch)):
        os.makedirs("%s/%04d"%("checkpoints",epoch))

    # The following code has problem since model_checkpoint_name is not defined!!!
    # saver.save(sess,model_checkpoint_name)

    if val_indices != 0:
        saver.save(sess,"%s/%04d/model.ckpt"%("checkpoints",epoch))


    target=open("%s/%04d/val_scores.csv"%("checkpoints",epoch),'w')
    target.write("val_name, avg_accuracy, precision, recall, f1 score, mean iou, %s\n" % (class_names_string))


    scores_list = []
    class_scores_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    iou_list = []


    # Do the validation on a small set of validation images
    for ind in val_indices:

        input_image = np.expand_dims(np.float32(load_image(val_input_names[ind])[:args.crop_height, :args.crop_width]),axis=0)/255.0
        gt = load_image(val_output_names[ind])[:args.crop_height, :args.crop_width]
        gt = helpers.reverse_one_hot(helpers.one_hot_it(gt, label_values))

        # st = time.time()

        output_image = sess.run(network,feed_dict={input:input_image})


        output_image = np.array(output_image[0,:,:,:])
        output_image = helpers.reverse_one_hot(output_image)
        out_vis_image = helpers.colour_code_segmentation(output_image, label_values)

        accuracy, class_accuracies, prec, rec, f1, iou = utils.evaluate_segmentation(pred=output_image, label=gt, num_classes=num_classes)

        file_name = utils.filepath_to_name(val_input_names[ind])
        target.write("%s, %f, %f, %f, %f, %f"%(file_name, accuracy, prec, rec, f1, iou))
        for item in class_accuracies:
            target.write(", %f"%(item))
        target.write("\n")

        scores_list.append(accuracy)
        class_scores_list.append(class_accuracies)
        precision_list.append(prec)
        recall_list.append(rec)
        f1_list.append(f1)
        iou_list.append(iou)

        gt = helpers.colour_code_segmentation(gt, label_values)

        file_name = os.path.basename(val_input_names[ind])
        file_name = os.path.splitext(file_name)[0]
        cv2.imwrite("%s/%04d/%s_pred.png"%("checkpoints",epoch, file_name),cv2.cvtColor(np.uint8(out_vis_image), cv2.COLOR_RGB2BGR))
        cv2.imwrite("%s/%04d/%s_gt.png"%("checkpoints",epoch, file_name),cv2.cvtColor(np.uint8(gt), cv2.COLOR_RGB2BGR))


    target.close()

    avg_score = np.mean(scores_list)
    class_avg_scores = np.mean(class_scores_list, axis=0)
    avg_scores_per_epoch.append(avg_score)
    avg_precision = np.mean(precision_list)
    avg_recall = np.mean(recall_list)
    avg_f1 = np.mean(f1_list)
    avg_iou = np.mean(iou_list)

    print("\nAverage validation accuracy for epoch # %04d = %f"% (epoch, avg_score))
    print("Average per class validation accuracies for epoch # %04d:"% (epoch))
    for index, item in enumerate(class_avg_scores):
        print("%s = %f" % (class_names_list[index], item))
    print("Validation precision = ", avg_precision)
    print("Validation recall = ", avg_recall)
    print("Validation F1 score = ", avg_f1)
    print("Validation IoU score = ", avg_iou)

    epoch_time=time.time()-epoch_st
    remain_time=epoch_time*(args.num_epochs-1-epoch)
    m, s = divmod(remain_time, 60)
    h, m = divmod(m, 60)
    if s!=0:
        train_time="Remaining training time = %d hours %d minutes %d seconds\n"%(h,m,s)
    else:
        train_time="Remaining training time : Training completed.\n"
    utils.LOG(train_time)
    scores_list = []

fig = plt.figure(figsize=(11,8))
ax1 = fig.add_subplot(111)


ax1.plot(range(args.num_epochs), avg_scores_per_epoch)
ax1.set_title("Average validation accuracy vs epochs")
ax1.set_xlabel("Epoch")
ax1.set_ylabel("Avg. val. accuracy")


plt.savefig('accuracy_vs_epochs.png')

plt.clf()

ax1 = fig.add_subplot(111)


ax1.plot(range(args.num_epochs), avg_loss_per_epoch)
ax1.set_title("Average loss vs epochs")
ax1.set_xlabel("Epoch")
ax1.set_ylabel("Current loss")

plt.savefig('loss_vs_epochs.png')

(1, 256, 256, 3)
(1, 256, 256, 3, 32)


ValueError: Cannot feed value of shape (1, 256, 256, 3, 32) for Tensor 'Placeholder_1:0', which has shape '(?, ?, ?, 32)'