In [None]:
import os
import yaml
import warnings
import pickle

from cachetools import cached
from itertools import product

import numpy as np
import seaborn as sns

from tqdm.notebook import tqdm
from sklearn.metrics import f1_score
from PIL import Image
from matplotlib import pyplot as plt

from netconfig import NetConfig

np.set_printoptions(precision=4)
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
tf.debugging.set_log_device_placement(False)
tf.random.set_seed(42)

print(f"Tensorflow version: {tf.__version__}")

# Training Configuration
Set `SELECTED_MODEL` to `None` to perform a parameter search, or set it
to a `NetConfig` object with the chosen configuration

In [None]:
LABELS_PATH="labels.yaml"
IMAGES_BASE="/workspaces/motion/images/esp32/good"
DISCARD_IMAGES_DIR="images/esp32/discard"
BATCH_SIZE=10

# SELECTED_MODEL=None
SELECTED_MODEL=NetConfig(conv_layers=4, conv_per_layer=6, conv_kernel_shape=(3, 3), dropout1=0.7, dropout2=0.5, dense_size=128)
# Current best:
# SELECTED_MODEL=NetConfig(conv_layers=4, conv_per_layer=3, conv_kernel_shape=(3, 3), dropout1=0.7, dropout2=0.5, dense_size=128)
# SELECTED_MODEL=NetConfig(conv_layers=4, conv_per_layer=3, conv_kernel_shape=(3, 3), dropout1=0.5, dropout2=0.7, dense_size=128)
# SELECTED_MODEL=NetConfig(conv_layers=2, conv_per_layer=6, conv_kernel_shape=(3, 3), dropout1=0.5, dropout2=0.7, dense_size=128)
# SELECTED_MODEL=NetConfig(conv_layers=4, conv_per_layer=2, conv_kernel_shape=(3, 3), dropout1=0.5, dropout2=0.7, dense_size=128)

# Load labels globally

In [None]:
with open(LABELS_PATH) as f:
    label_map = yaml.load(f, Loader=yaml.SafeLoader)["labels"]
reverse_label_map = {v:k for k,v in label_map.items()}

# Utility functions

In [None]:
def tf_init():
  # Taken from tensorflow GPU docs
  # https://www.tensorflow.org/guide/gpu
  gpus = tf.config.list_physical_devices('GPU')
  if gpus:
    try:
      # Currently, memory growth needs to be the same across GPUs
      for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
        # tf.config.experimental.set_virtual_device_configuration(
        #   gpu, [tf.config.LogicalDeviceConfiguration(memory_limit=1024)]
        # )
      logical_gpus = tf.config.list_logical_devices('GPU')
      print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
      # Memory growth must be set before GPUs have been initialized
      print(e)

@cached(cache={})
def load_train_data():
    examples = []
    examples_labels = []
    for dir,_,files in os.walk(IMAGES_BASE):
        for file in files:
            examples.append(np.asarray(Image.open(os.path.join(dir, file)).point(lambda x: 255 if x>0 else 0), dtype=np.float32).reshape(29, 40, 1))
            examples_labels.append(label_map[os.path.basename(dir)])
    examples_numpy = np.asarray(examples)
    examples_labels_numpy = np.asanyarray(examples_labels)

    ds = tf.data.Dataset.from_tensor_slices((examples_numpy, examples_labels_numpy))
    ds = ds.shuffle(buffer_size=2000, reshuffle_each_iteration=False).batch(BATCH_SIZE)

    val_ds = ds.take(10)
    test_ds = ds.skip(10).take(10)
    train_ds = ds.skip(20)

    return (train_ds, val_ds, test_ds)

@cached(cache={})
def load_discard_images():
    discard_files = []
    garbage_image_arrays = []
    for dir,_,files in os.walk(DISCARD_IMAGES_DIR):
        for file in files:
            discard_files.append(os.path.join(dir, file))

    for imgfile in discard_files:
        i = Image.open(imgfile).point(lambda x: 255 if x>0 else 0)
        garbage_img_array = np.asarray(i, dtype=np.float32).reshape(29,40, 1)
        garbage_image_arrays.append(garbage_img_array)

    all_garbage_images = np.array(garbage_image_arrays)
    garbage_ds = tf.data.Dataset.from_tensor_slices(all_garbage_images)

    return garbage_ds.batch(BATCH_SIZE)

# Model config generator for parameter searches
The variables at the top of the generator define the parameter grid.
See the definition of NetConfig in `netconfig.py` for all the fields.  I pulled
the definition of that object class out to its own module because it was less problematic
when I tried to pickle, unpickle things containing objects of that class.

In [None]:
def config_generator():
  conv_layers = [1,2]
  conv_per_layer = [4,6]
  conv_kernel_shape = [(3,3), (5,5)]
  dropout1 = [.7, .8, .9]
  dropout2 = [.7, .8, .9]
  dense_size = [256, 384, 512]

  grid = [
    conv_layers,
    conv_per_layer,
    conv_kernel_shape,
    dropout1,
    dropout2,
    dense_size
  ]

  for c in product(*grid):
    yield NetConfig(
      conv_layers=c[0],
      conv_per_layer=c[1],
      conv_kernel_shape=c[2],
      dropout1=c[3],
      dropout2=c[4],
      dense_size=c[5]
    )

# Build a network and train a model according to a passed NetConfig
This returns the Tensorflow History object from the call to fit().  You can get
the model in the `model` attribute of this object.

In [None]:
def generate_model(config: NetConfig, train_ds, val_ds):
  layers = []
  for i in range(config.conv_layers):
    layers.append(
      tf.keras.layers.Conv2D(config.conv_per_layer, config.conv_kernel_shape, padding='same', activation='relu')
    )
  layers.append(tf.keras.layers.MaxPool2D(strides=(2,2)))
  layers.append(tf.keras.layers.Flatten())
  layers.append(tf.keras.layers.Dropout(config.dropout1))
  layers.append(tf.keras.layers.Dense(config.dense_size, activation='relu'))
  layers.append(tf.keras.layers.Dropout(config.dropout2))
  layers.append(tf.keras.layers.Dense(8, activation='softmax'))
  model = tf.keras.models.Sequential(layers)

  model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=.0005),
    loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
  )

  log_dir = "logs/fit/"
  tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

  stoping_callback = tf.keras.callbacks.EarlyStopping(
    monitor='loss',
    mode='min',
    patience=10,
    verbose=1,
    restore_best_weights=True,
    start_from_epoch=100
  )

  history = model.fit(
      train_ds,
      epochs=200,
      validation_data=val_ds,
      callbacks=[
        stoping_callback,
        tensorboard_callback
      ],
      verbose=0,
  )
  return history

# Functions for parameter search

In [None]:
def train_and_evaluate_model(c: NetConfig):
    (train_ds, val_ds, test_ds) = load_train_data()
    h = generate_model(c, train_ds, val_ds)
    model = h.model

    real_classes = []
    pred_classes = []
    count_high_prob = 0

    for batch in test_ds:
        preds = model.predict(batch[0], verbose=0)
        tmp_max_value = np.amax(preds, axis=1)
        count_high_prob += np.count_nonzero(tmp_max_value > .8)
        pred_classes.extend(tf.argmax(preds, axis=1).numpy())
        real_classes.extend(batch[1].numpy())

    weighted_f1 = f1_score(real_classes, pred_classes, average='weighted')
    micro_f1 = f1_score(real_classes, pred_classes, average='micro')
    macro_f1 = f1_score(real_classes, pred_classes, average='macro')

    discard_images = load_discard_images()
    preds = model.predict(discard_images, verbose=0)
    max_value = np.amax(preds, axis=1)
    high_prob_crap_percent = (np.count_nonzero(max_value > .8)/len(max_value))*100
    ret =  {
        'model': model,
        'config': c,
        'weighted_f1': weighted_f1,
        'micro_f1': micro_f1,
        'macro_f1': macro_f1,
        'high_prob_good_percent': (count_high_prob/len(pred_classes))*100,
        'high_prob_crap_percent': high_prob_crap_percent
    }
    ret.update({k: v[29] for k,v in h.history.items()})
    return ret

def param_search():
    tf_init()
    rets = []
    configs = [c for c in config_generator()]
    for c in tqdm(configs[:5]):
        r = train_and_evaluate_model(c)
        del r['model']
        print(r)
        rets.append(r)

    # with open("param_results.p", "wb") as f:
    #     pickle.dump(rets, f)

# Network parameter/config search
This will run if `SELECTED_MODEL` is set to `None` at the top of this notebook

In [None]:
if not SELECTED_MODEL:
    param_search()

# Functions for evaluating a single chosen model

In [None]:
def confusion_matrix_for_model(model, test_ds):
    real_classes = []
    pred_classes = []

    for batch in test_ds:
        preds = model.predict(batch[0], verbose=0)
        pred_classes.extend(tf.argmax(preds, axis=1).numpy())
        real_classes.extend(batch[1].numpy())

    weighted_f1 = f1_score(real_classes, pred_classes, average='weighted')
    conf_matrix = tf.math.confusion_matrix(real_classes, pred_classes)

    fig, ax = plt.subplots(figsize=(7.5, 7.5))
    ax.matshow(conf_matrix, cmap=plt.cm.gray, alpha=0.3)
    for i in range(conf_matrix.shape[0]):
        for j in range(conf_matrix.shape[1]):
            ax.text(x=j, y=i,s=conf_matrix[i, j].numpy(), va='center', ha='center', size='xx-large')
    
    plt.xlabel('Predictions', fontsize=18)
    plt.ylabel('Actuals', fontsize=18)
    plt.title(f'Confusion Matrix (weighted F1: {weighted_f1})', fontsize=18)

    return plt

def discard_histogram_for_model(model, discard_ds):
    preds = model.predict(discard_ds, verbose=0)
    max_arg = np.argmax(preds, axis=1)
    max_value = np.amax(preds, axis=1)
    bad_args = max_arg[max_value > .8]

    high_prob_crap_percent = (np.count_nonzero(max_value > .8)/len(max_value))*100

    ax = sns.histplot(
        bad_args,
    )
    ax.set_title(
        f"High probability predictions on discard pile\n({high_prob_crap_percent}%)"
    )
    ax.set_xlim(0,len(label_map)-1)
    ax.set_xticks(range(0,len(label_map)))
    return ax

# Fit Model

In [None]:
if SELECTED_MODEL:
    (train_ds, val_ds, test_ds) = load_train_data()
    discard_ds = load_discard_images()
    history = generate_model(SELECTED_MODEL, train_ds=train_ds, val_ds=val_ds)
    history.model.save('./output')

In [None]:
if SELECTED_MODEL:
    history.model.summary()

In [None]:
# # if SELECTED_MODEL:
# print(SELECTED_MODEL)
# from keras.utils.vis_utils import plot_model
# plot_model(history.model, show_shapes=True, show_layer_names=True)

In [None]:
if SELECTED_MODEL:
    confusion_matrix_for_model(history.model, test_ds=test_ds)

In [None]:
if SELECTED_MODEL:
    discard_histogram_for_model(history.model, discard_ds=discard_ds)

# TFLite Conversion
This expects the TF model to be in `./output`

In [None]:
if SELECTED_MODEL:
    def representative_data_gen():
        (train_ds, _, _) = load_train_data()
        for batch in train_ds:
            yield [batch[0]]

    # Convert the model
    converter = tf.lite.TFLiteConverter.from_saved_model('./output')
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = representative_data_gen
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.uint8
    converter.inference_output_type = tf.uint8
    tflite_model = converter.convert()

    # Save the model.
    with open('model.tflite', 'wb') as f:
        f.write(tflite_model)

# Evaluate TFLite model the same way

In [None]:
class TFLiteModelWrapper:
    """
    Wrap a tflite model with something that provides a predict() method
    that works like a normal model so we can use our existing test functions
    against them
    """
    def __init__(self, path):
        self.interpreter = tf.lite.Interpreter(path)
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.interpreter.allocate_tensors()

    def predict(self, data, verbose=0):
        ret = []
        if not hasattr(data, 'shape'):
            # This is a batch dataset
            batches = [b for b in data]
        else:
            batches = [data]
        for batch in batches:
            for image in batch:
                expand_image = np.expand_dims(image, axis=0)
                self.interpreter.set_tensor(self.input_details[0]['index'], expand_image.astype(np.uint8))
                self.interpreter.invoke()
                output_data = self.interpreter.get_tensor(self.output_details[0]['index'])
                # Scale back to a float so the comparison functions based on the
                # un-converted model will still work
                ret.append(output_data/255)

        return np.squeeze(np.array(ret))

tfl = TFLiteModelWrapper('model.tflite')

In [None]:
if SELECTED_MODEL:
    confusion_matrix_for_model(tfl, test_ds)

In [None]:
if SELECTED_MODEL:
    discard_histogram_for_model(tfl, discard_ds)

# Write out model CPP