In [None]:
# To enable faster auto-complete
%config Completer.use_jedi = False

In [None]:
import sys 
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import pandas as pd
import sys
import pickle
from utils import data_augmentation
from tensorflow import keras
from sklearn.model_selection import train_test_split, KFold
from sklearn.utils import shuffle
import datetime


In [None]:
# Selecting the GPU to be used 
gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)
if gpus:
    # Restrict tensor flow to use GPU-1
    try:
        tf.config.experimental.set_visible_devices(gpus[0:4], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        # Set GPUs before initializing
        print(e)

In [None]:
# Plotting of the confusion matrix
def confusion_matrix(model, X_test, y_test, class_names):
    # Prediction on test data set -> predicting classes
    y_pred = model.predict_classes(X_test)
    con_mat = tf.math.confusion_matrix(labels=y_test, predictions=y_pred).numpy()
    # Normalizing the confusion matrix
    con_mat_norm = np.around(con_mat.astype('float') / con_mat.sum(axis=1) [:, np.newaxis], decimals=2)
    con_mat_df = pd.DataFrame(con_mat_norm, index=class_names, columns=class_names)
    # Plotting using an heat map
    figure = plt.figure(figsize=(8,8))
    sns.set(font_scale = 2)
    sns.heatmap(con_mat_df, annot=True, cmap=plt.cm.Blues)
    plt.tight_layout()
    plt.ylabel("True label")
    plt.xlabel("Predicted label")

In [None]:
def check_create_dir(*args):
    # Get the directory name
    temp = ""
    for i in args:
        temp = os.path.join(temp, i)
    
    # Check if exists else create them all
    if os.path.isdir(temp):
        pass
    else:
        os.makedirs(temp)

# Multi-output prediction model

- A single model identifies the axis (classification) and predicts the feed rate (regression)

- The axis that are detected are given below

    - X-axis
    - Y-axis 
    - Z-axis
    - B-axis
    - C-axis

- The feed rates predicted involves the whole range for that particular axis

## Model architecture

In [None]:
# Simple model architecture
def build_multipred_model(input_shape, act_fn, compile_model=True):
    
    # Build the model architecture with multiple outputs
    input_layer = keras.Input(shape=input_shape, name="input")
    x = keras.layers.Conv1D(1024, 3, activation=act_fn[0], padding="same", kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(input_layer)
    x = keras.layers.Conv1D(512, 3, activation=act_fn[0], padding="same", kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.MaxPool1D(2)(x)
    x = keras.layers.Conv1D(512, 2, activation=act_fn[0], padding="same", kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(x)
    x = keras.layers.Conv1D(256, 2, activation=act_fn[0], padding="same", kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(x)
    x = keras.layers.BatchNormalization()(x)
    # Where the split between two output happens
    split_layer = keras.layers.MaxPool1D(2)(x)
    x = keras.layers.Flatten()(split_layer)
    x = keras.layers.Dense(512, activation=act_fn[0], kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(x)
    x = keras.layers.Dropout(0.7)(x)
    x = keras.layers.Dense(256, activation=act_fn[0], kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(x)
    axis_detection = keras.layers.Dense(5, activation=act_fn[1], name="axis_detection", kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(x)
    y = keras.layers.Conv1D(256, 2, activation=act_fn[0], padding="same", kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(split_layer)
    y = keras.layers.Conv1D(128, 2, activation=act_fn[0], padding="same", kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(y)
    y = keras.layers.BatchNormalization()(y)
    y = keras.layers.MaxPool1D(2)(y)
    y = keras.layers.Conv1D(128, 2, activation=act_fn[0], padding="same", kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(y)
    y = keras.layers.Conv1D(64, 2, activation=act_fn[0], padding="same", kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(y)
    y = keras.layers.BatchNormalization()(y)
    y = keras.layers.MaxPool1D(2)(y)
    y = keras.layers.Conv1D(64, 2, activation=act_fn[0], padding="same", kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(y)
    y = keras.layers.Conv1D(32, 2, activation=act_fn[0], padding="same", kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(y)
    y = keras.layers.BatchNormalization()(y)
    y = keras.layers.MaxPool1D(2)(y)
    y = keras.layers.Conv1D(32, 2, activation=act_fn[0], padding="same", kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(y)
    y = keras.layers.Conv1D(16, 2, activation=act_fn[0], padding="same", kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(y)
    y = keras.layers.BatchNormalization()(y)
    y = keras.layers.MaxPool1D(2)(y)
    y = keras.layers.Flatten()(y)
    y = keras.layers.Dense(512, kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(y)
    y = keras.layers.Dropout(0.5)(y)
    y = keras.layers.Dense(256, kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))(y)
    feed_rate_pred = keras.layers.Dense(1, activation=None, name="feed_rate_prediction")(y)
    
    # Get the model out
    model = keras.Model(input_layer, [axis_detection, feed_rate_pred], name="multi_output_model")
    
    if compile_model:
        # Optimizer
        adam = keras.optimizers.Adam(learning_rate=0.0001)
        model.compile(optimizer=adam, loss={"axis_detection": "sparse_categorical_crossentropy", "feed_rate_prediction": "mse"}, 
                      metrics={"axis_detection": "accuracy"}, loss_weights=[1, 20])
        
    return model
    

## Loading the data

### Load without oversampling

### Load with oversampling

In [None]:
# Directory of the dataset
data_dir = os.path.join(os.getcwd(), "model_data", "multi_output_axisfr")

for index1, file in enumerate(os.listdir(data_dir)):
    data = np.load(os.path.join(data_dir, file), allow_pickle=True)[()]
    
    # Random oversample the data
    data_ov = data_augmentation.naive_resampler(data, 1)
    
    # Ensure the count for all classes
    sys.stdout.write(20 * "=" + "\n")
    sys.stdout.write(f"The name of the file - {file} \n")
    for key, value in data_ov.items():
        sys.stdout.write(f"The class {key} has a shape of {value.shape[0]}\n")
    
    for index2, ((axis, feed_rate), segmented_points) in enumerate(data_ov.items()):
        
        temp_axis = np.repeat(axis, segmented_points.shape[0])[:, np.newaxis]
        temp_fr = np.repeat(feed_rate, segmented_points.shape[0])[:, np.newaxis]
    
        # part of X and y
        if index2 == 0:
            # part of y for an axis and all feed rate
            part_y_axis = temp_axis
            part_y_fr = temp_fr
            # part of X
            part_X = segmented_points
        else:
            part_y_axis = np.append(part_y_axis, temp_axis, axis=0)
            part_y_fr = np.append(part_y_fr, temp_fr, axis=0)
            part_X = np.append(part_X, segmented_points, axis=0)
            
    if index1 == 0:
        # y
        y_axis = part_y_axis
        y_fr = part_y_fr
        # X
        X = part_X
    else:
        y_axis = np.append(y_axis, part_y_axis, axis=0)
        y_fr = np.append(y_fr, part_y_fr, axis=0)
        X = np.append(X, part_X, axis=0)

In [None]:
# Make the generator for the data 
def make_generator(X, y_axis, y_fr, n_splits):
    
    def gen():
        for train_index, test_index in KFold(n_splits).split(X):
            X_train, y_axis_train, y_fr_train = X[train_index], y_axis[train_index], y_fr[train_index]
            X_test, y_axis_test, y_fr_test = X[test_index], y_axis[test_index], y_fr[test_index]
            
            # Yield the data every iteration
            yield X_train, X_test, y_axis_train, y_axis_test, y_fr_train, y_fr_test
    
    # Return as a tf dataset generator API
    return tf.data.Dataset.from_generator(gen, (tf.float64, tf.float64, tf.int8, tf.int8, tf.float16, tf.float16))

In [None]:
# Shuffling the data. This is pretty important, otherwise the model will fail.
X, y_axis, y_fr = shuffle(X, y_axis, y_fr, random_state=42)

# Create the Cross-Validation dataset
cv_dataset = make_generator(X, y_axis, y_fr, n_splits=10)

# Split into testing data
# X, X_test, y_axis, y_axis_test, y_fr, y_fr_test = train_test_split(X, y_axis, y_fr, test_size=0.10)

## Build the model

### Using multiple GPUs

In [None]:
# Using multiple GPUs
mirrored_strategy = tf.distribute.MirroredStrategy()

with mirrored_strategy.scope():
    # Build the model with appropriate parameters
    model = build_multipred_model((85, 3), ("relu", "softmax"), compile_model=False)

# Optimizer
adam = keras.optimizers.Adam(learning_rate=0.0001)
# Compilation
model.compile(optimizer=adam, loss={"axis_detection": "sparse_categorical_crossentropy", "feed_rate_prediction": "mse"}, 
              metrics={"axis_detection": "accuracy"}, loss_weights=[1, 20])

### Using single GPU

In [None]:
early_stopping_cb = keras.callbacks.EarlyStopping(monitor="val_loss", restore_best_weights=True, patience=100)

# save dir name
dir_name = "multi_output_ax-fr"
save_path = os.path.join(os.getcwd(), "model_weights", dir_name)
folder_time = datetime.datetime.now().isoformat()

# Create all required directories
check_create_dir(save_path, folder_time)

history = {}
score_eval = {}
for fold, (X_train, X_test, y_axis_train, y_axis_test, y_fr_train, y_fr_test) in enumerate(cv_dataset):
    
    if fold == 0:
        # Resetting model weights every iteration
        initial_weights = model.get_weights()
        # Divert the output to console out
        nb_stdout = sys.stdout

    sys.stdout = open("/dev/stdout", "w")

    # Fit the model and save the training progress
    history[fold] = model.fit(X_train, {"axis_detection": y_axis_train, "feed_rate_prediction": y_fr_train}, batch_size=1024, epochs=5000, 
                        shuffle=True, validation_split=0.30, callbacks=[early_stopping_cb])
    # Bring the output back to the notebook
    sys.stdout = nb_stdout
    
    # Print the test results
    score_eval[fold] = model.evaluate(X_test, {"axis_detection": y_axis_test, "feed_rate_prediction": y_fr_test})
    print(30*"=")
    print(f"The results for fold-{fold} is {score_eval[fold]}")
    
    # Save the model weights 
    model.save(os.path.join(save_path, folder_time,  f"multi-output_KFold-{fold}_model.h5"))
    # Rest the model weights
    model.set_weights(initial_weights)
    
    

## Saving the model training history

In [None]:
# The the training history of the model is saved for future reference
save_location = os.path.join(os.getcwd(), "model_weights", "multi_output_ax-fr", folder_time, "training_history")

# history - dict
new_history = {}
for key, value in history.items():
    new_history[key] = value.history
    
with open(os.path.join(save_location, "history.pickle"), "wb") as fhandle:
    pickle.dump(new_history, fhandle, protocol=pickle.HIGHEST_PROTOCOL)

# score - dict
with open(os.path.join(save_location, "score.pickle"), "wb") as fhandle:
    pickle.dump(score_eval, fhandle, protocol=pickle.HIGHEST_PROTOCOL)

## Plotting the training history

In [None]:
# Load the saved training history if required
load_location = os.path.join(os.getcwd(), "model_weights", "multi_output_ax-fr", folder_time, "training_history", "history.pickle")
with open(load_location, "rb") as fhandle:
    loaded_history = pickle.load(fhandle)

In [None]:
# Plotting the axis detection loss
plt.plot(loaded_history[0]["axis_detection_loss"])
plt.plot(loaded_history[0]["val_axis_detection_loss"])
plt.title('Model - Axis detection loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend(['Train', 'Validation'])

In [None]:
plt.cla()
# Plotting the axis detection accuracy
plt.plot(loaded_history[0]["axis_detection_accuracy"])
plt.plot(loaded_history[0]["val_axis_detection_accuracy"])
plt.title('Model - Axis detection accuracy')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend(['Train', 'Validation'])

In [None]:
plt.cla()
# Plotting the feed rate prediction loss
plt.plot(loaded_history[0]["feed_rate_prediction_loss"])
plt.plot(loaded_history[0]["val_feed_rate_prediction_loss"])
plt.title('Model - Feed rate prediction loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend(['Train', 'Validation'])

In [None]:
plt.cla()
# Plotting the cumulative loss
plt.plot(loaded_history[0]["loss"])
plt.plot(loaded_history[0]["val_loss"])
plt.title('Model - The cummulative loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend(['Train', 'Validation'])