# Hands-On Deep Learning with Swift
**By:** Joshua Newnham (Author)  
**Publisher:** [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/machine-learning-core-ml)

## Chapter 05 - Sketch Classifier
In this notepad we build, train and export the weights of a convolutional neural network for the purposes of sketch recognition. 

We use the dataset from the paper <a href='http://cybertron.cg.tu-berlin.de/eitz/projects/classifysketch/'>How Do Humans Sketch Objects?</a> where the authors collected 20,000 unique sketches evenly distributed over 250 object categories - we will use a CNN (using Keras) to classify a sketch. 

<img src='images/sketch_examples.jpg' />

We use this notebook (and Keras) to rapidly prototype networks to come up with something we can transfer to MPS; we constrain ourselves to barriers/obstacles faced within MPS (such as being constrained to a small batch size) to find a model that will *satisfactory* classify hand drawn sketches (we define *satisfactory* as a classifier achieving approx. 60% accuracy on the validation set).

For the purposes of debugging the network in MPS we export the weights and compare classification results between Keras and MPS (sanity check). 

In [2]:
import numpy as np 
import tensorflow as tf
import os 

## Define constants and helper functions 

In [28]:
# Directory where the preprocessed sketches reside (output from running the script preprocess_sketch_images.py
# on the original dataset)
ROOT_DIR = '/Users/joshua.newnham/Documents/Shared Playground Data/Sketches/preprocessed/'
# Subdirectory where the validation images reside 
VALID_DIR = os.path.join(ROOT_DIR, "valid")
# Subdirectory where the training images reside 
TRAIN_DIR = os.path.join(ROOT_DIR, "train")
# Exported weights 
WEIGHTS_FILE = "sketch_classifier.h5"

# Target image size 
TARGET_SIZE = (128,128)
# Input shape into our network 
INPUT_SHAPE = (128,128,1)
# Number of classes we are classifying
NUM_CLASSES = 22

In [30]:
def count_files(full_path):
    """
    Recursively count the number of files within the specified directory 
    """
    count = 0 
    def is_image(file_path):
        image_extensions = ['png', 'jpg', 'jpeg']
        
        for image_extension in image_extensions:
            if image_extension in file_path.lower():
                return True
            
        return False 
    
    for d in os.listdir(full_path):
        if not os.path.isdir(os.path.join(full_path, d)):
            continue
            
        sub_full_path = os.path.join(full_path, d)
        
        for f in os.listdir(sub_full_path):
            img_path = os.path.join(sub_full_path, f)
            if os.path.isfile(img_path) and is_image(img_path):
                count += 1
            
    return count

In [5]:
NUM_TRAIN_FILES = count_files(TRAIN_DIR)
NUM_VALID_FILES = count_files(VALID_DIR)

In [31]:
def train(model):
    """
    Function to handle the training given a valid model; 
    Creates the train and validation generators along with 
    early stopping and checkpoints. 
    
    Returns the training history and trained model 
    """
    datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255.)
    
    # create an iterator for the training data 
    train_generator = datagen.flow_from_directory(
        TRAIN_DIR,
        target_size=TARGET_SIZE,
        batch_size=BATCH_SIZE, 
        color_mode='grayscale')
    
    # create an iterator for the validation data 
    validation_generator = datagen.flow_from_directory(
        VALID_DIR,
        target_size=TARGET_SIZE,
        batch_size=BATCH_SIZE, 
        color_mode='grayscale')
    
    checkpoint = tf.keras.callbacks.ModelCheckpoint(WEIGHTS_FILE, 
                                           monitor='val_loss', 
                                           verbose=0, 
                                           save_best_only=True, 
                                           save_weights_only=True, 
                                           mode='auto', 
                                           period=2)
    
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
    
    history = model.fit_generator(
        train_generator,
        steps_per_epoch=int(NUM_TRAIN_FILES/BATCH_SIZE),
        epochs=EPOCHS,
        validation_data=validation_generator,
        validation_steps=int(NUM_VALID_FILES/BATCH_SIZE), 
        callbacks=[checkpoint, early_stopping]) 
    
    return history, model 

In [32]:
def validate_model(model):
    """
    Handle validating the model using the validation dataset; 
    returns the score 
    """
    datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255.)
    
    # create an iterator for the validation data 
    validation_generator = datagen.flow_from_directory(
        VALID_DIR,
        target_size=TARGET_SIZE,
        batch_size=BATCH_SIZE, 
        color_mode='grayscale')
    
    score = model.evaluate_generator(validation_generator)
    
    return score 

---

## Create and train model 

In [33]:
# Hyperparameters 

TARGET_SIZE = (128,128)
INPUT_SHAPE = (128,128,1)
NUM_CLASSES = 22

EPOCHS = 1000 
BATCH_SIZE = 4

In [9]:
def create_model():
    """

    """
    model = tf.keras.models.Sequential() 
    
    model.add(tf.keras.layers.Conv2D(32, 
                                     kernel_size=(7,7), 
                                     strides=(2,2),  
                                     padding='same',                                       
                                     activation=None, 
                                     input_shape=INPUT_SHAPE, 
                                     name='l1'))    
    model.add(tf.keras.layers.Activation('relu'))           
    model.add(tf.keras.layers.Dropout(0.3))
    
    model.add(tf.keras.layers.Conv2D(32, 
                                     kernel_size=(5,5),
                                     strides=(1,1),  
                                     padding='same', 
                                     activation=None, 
                                     name='l2'))            
    model.add(tf.keras.layers.MaxPool2D(2,2))
    model.add(tf.keras.layers.Activation('relu'))
    
    model.add(tf.keras.layers.Conv2D(32, 
                                     kernel_size=(5,5),
                                     strides=(1,1),  
                                     padding='same', 
                                     activation=None, 
                                     name='l3'))            
    model.add(tf.keras.layers.MaxPool2D(2,2))
    model.add(tf.keras.layers.Activation('relu'))
    model.add(tf.keras.layers.Dropout(0.3))
    
    model.add(tf.keras.layers.Conv2D(32, 
                                     kernel_size=(5,5),
                                     strides=(1,1),  
                                     padding='same', 
                                     activation=None, 
                                     name='l4'))        
    model.add(tf.keras.layers.Activation('relu'))
    model.add(tf.keras.layers.MaxPool2D(2,2))        
    model.add(tf.keras.layers.Dropout(0.3)) 
    
    model.add(tf.keras.layers.Flatten())      
    
    model.add(tf.keras.layers.Dense(64, activation=None, name='l5'))    
    model.add(tf.keras.layers.Activation('relu'))    
    model.add(tf.keras.layers.Dropout(0.3))    
        
    model.add(tf.keras.layers.Dense(NUM_CLASSES, activation=None, name='l6'))
    model.add(tf.keras.layers.Activation('softmax', name="output"))
    
    model.compile(
        loss='categorical_crossentropy', 
        optimizer=tf.keras.optimizers.SGD(lr=0.01), 
        metrics=['accuracy']) 
    
    return model 

In [10]:
model = create_model() 
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
l1 (Conv2D)                  (None, 64, 64, 32)        1600      
_________________________________________________________________
activation (Activation)      (None, 64, 64, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 64, 64, 32)        0         
_________________________________________________________________
l2 (Conv2D)                  (None, 64, 64, 32)        25632     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 32, 32, 32)        0         
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
l3 (Conv2D)                  (None, 32, 32, 32)        25632     
__________

In [11]:
history, model = train(model)

Found 2844 images belonging to 22 classes.
Found 176 images belonging to 22 classes.
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000


In [12]:
score = validate_model(model)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Found 176 images belonging to 22 classes.
('Test loss:', 1.2765541143122723)
('Test accuracy:', 0.6363636363636364)


--- 

## Save weights 

In [13]:
import json 
from keras.models import model_from_json

with open('sketch_cnn.json', 'w') as f:
    json_obj = json.loads(model.to_json())
    json.dump(json_obj, f)

model.save_weights('sketch_cnn.h5')

Using TensorFlow backend.


--- 

## Export Weights 

The type of each entry in array is given by -dataType. The number of entries is equal to:

inputFeatureChannels outputFeatureChannels kernelHeight kernelWidth*

The layout of filter weight is as a 4D tensor (array) weight[ outputChannels ][ kernelHeight ][ kernelWidth ][ inputChannels / groups ]

Note: For binary-convolutions the layout of the weights are: weight[ outputChannels ][ kernelHeight ][ kernelWidth ][ floor((inputChannels/groups)+31) / 32 ] with each 32 sub input feature channel index specified in machine byte order, so that for example the 13th feature channel bit can be extracted using bitmask = (1U << 13).

In [14]:
def export_conv_weights(name, wts_coef, bias_coef):
    print("Exporting weights for {}\n\t{}\n\t{}".format(name, 
          os.path.join('exports', "{}_conv.data".format(name)), 
          os.path.join('exports', "{}_bias.data".format(name))))
    
    print("\n")
    
    # [kernel_width, kernel_height, input_feature_channels, output_feature_channels]
    print("\tOriginal weights shape {}".format(wts_coef.shape))
    if bias_coef is not None:
        # [output_feature_channels]
        print("\tOriginal bias shape {}".format(bias_coef.shape))
    
    # [output_feature_channels, kernel_width, kernel_height, input_feature_channels]
    wts_coef = wts_coef.transpose(3, 0, 1, 2)
    print("\tReshaped weights shape {}".format(wts_coef.shape))    
    wts_coef.tofile(os.path.join('exports', "{}_conv.data".format(name)))    
    
    if bias_coef is not None:
        bias_coef = np.squeeze(bias_coef)
        print("\tReshaped bias_coef shape {}".format(bias_coef.shape))    
        bias_coef.tofile(os.path.join('exports', "{}_bias.data".format(name)))    
        
    print("\n")

In [15]:
def export_dense_weights(name, wts_coef, bias_coef, kernel_width, kernel_height):
    """
    A fully connected layer in a Convolutional Neural Network (CNN) is one where every input channel is connected 
    to every output channel. The kernel width is equal to the width of the source image, and the 
    kernel height is equal to the height of the source image. The width and height of the output is 1 x 1.
    
    A fully connected layer takes an MPSImage object with dimensions 
    source.width x source.height x Ni, convolves it with Weights[No][source.width][source.height][Ni], 
    and produces a 1 x 1 x No output.
    
    Thus, the following conditions must be true:
    - kernelWidth == source.width
    - kernelHeight == source.height
    - clipRect.size.width == 1
    - clipRect.size.height == 1
    
    You can think of a fully connected layer as a matrix multiplication where the image is 
    flattened into a vector of length source.width*source.height*Ni, and the weights are arranged in a 
    matrix of dimension No x (source.width*source.height*Ni) to produce an output vector of length No.
    
    The value of the strideInPixelsX, strideInPixelsY, and groups properties must be 1. 
    The offset property is not applicable and it is ignored. Because the clip rectangle is 
    clamped to the destination image bounds, if the destination is 1 x 1, you do not need to set the 
    clipRect property.
    """
    print("Exporting weights for {}\n\t{}\n\t{}".format(name, 
          os.path.join('exports', "{}_conv.data".format(name)), 
          os.path.join('exports', "{}_bias.data".format(name))))
        
    input_feature_channels = int(wts_coef.shape[0] / kernel_width / kernel_height) 
    output_feature_channels = wts_coef.shape[-1]            
    
    # [kernel_width, kernel_height, input_feature_channels, output_feature_channels]
    print("\tOriginal weights shape {}".format(wts_coef.shape))
    
    #wts_coef = np.reshape(wts_coef, [kernel_width, kernel_height, input_feature_channels, output_feature_channels])    
    wts_coef = np.reshape(wts_coef, [kernel_width, kernel_height, -1, output_feature_channels])    
        
    if bias_coef is not None:
        # [output_feature_channels]
        print("\tOriginal bias shape {}".format(bias_coef.shape))
    
    # [output_feature_channels, kernel_width, kernel_height, input_feature_channels]
    wts_coef = wts_coef.transpose(3, 0, 1, 2)
    print("\tReshaped weights shape {}".format(wts_coef.shape))    
    wts_coef.tofile(os.path.join('exports', "{}_conv.data".format(name)))    
    
    if bias_coef is not None:
        bias_coef = np.squeeze(bias_coef)
        print("\tReshaped bias_coef shape {}".format(bias_coef.shape))    
        bias_coef.tofile(os.path.join('exports', "{}_bias.data".format(name)))   
        
    print("\n")

In [16]:
###### flatted_input_kernel_width = None
flatted_input_kernel_height = None

for layer in model.layers:        
    if "flatten" in layer.name:
        flatted_input_kernel_width = layer.input_shape[1] 
        flatted_input_kernel_height = layer.input_shape[2] 
        
    if len(layer.get_weights()) > 0:        
        name = layer.name
        wts = layer.get_weights()
        
        if name in ['l1', 'l2', 'l3', 'l4']:
            export_conv_weights(layer.name, wts[0], wts[1] if len(wts) == 2 else None)        
        elif name in ['l5', 'l6']:
            export_dense_weights(layer.name, wts[0], wts[1] if len(wts) == 2 else None, 
                                flatted_input_kernel_width, flatted_input_kernel_height)        
            # after the initial pass (from cnn to fcn); flattern the kernel down to 1x1 
            # i.e. update the flatted_input_kernel_DIM to have the kernel width and height of 1 
            flatted_input_kernel_width, flatted_input_kernel_height = 1, 1 

Exporting weights for l1
	exports/l1_conv.data
	exports/l1_bias.data


	Original weights shape (7, 7, 1, 32)
	Original bias shape (32,)
	Reshaped weights shape (32, 7, 7, 1)
	Reshaped bias_coef shape (32,)


Exporting weights for l2
	exports/l2_conv.data
	exports/l2_bias.data


	Original weights shape (5, 5, 32, 32)
	Original bias shape (32,)
	Reshaped weights shape (32, 5, 5, 32)
	Reshaped bias_coef shape (32,)


Exporting weights for l3
	exports/l3_conv.data
	exports/l3_bias.data


	Original weights shape (5, 5, 32, 32)
	Original bias shape (32,)
	Reshaped weights shape (32, 5, 5, 32)
	Reshaped bias_coef shape (32,)


Exporting weights for l4
	exports/l4_conv.data
	exports/l4_bias.data


	Original weights shape (5, 5, 32, 32)
	Original bias shape (32,)
	Reshaped weights shape (32, 5, 5, 32)
	Reshaped bias_coef shape (32,)


Exporting weights for l5
	exports/l5_conv.data
	exports/l5_bias.data
	Original weights shape (2048, 64)
	Original bias shape (64,)
	Reshaped weights shape (64, 8,