#SYSC 5906:
## **Room Detection - V2.0**

---
Script to train and validate a **Keras model** to peform room deteciton based on a processed version of the MIT Indoor Scenes dataset.

###Step 1: Access Drive
Mount the drive with the provided .zip file of code located in it

In [None]:
#Enter the gdrive
from google.colab import drive
drive.mount('/gdrive',force_remount=True)

Mounted at /gdrive


##Step 2: Setup
Import relevenat libraries, load original dataset into colabs (without the images) and get the folders inside

In [None]:
#Get tensorFlow built in datasets
!pip install -q tfds-nightly tensorflow tensorflow-datasets
!pip install github-clone

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import tensorflow as tf
import pandas as pd
import pickle
import sklearn as sk
import matplotlib.pyplot as plt
import numpy as np
import tensorflow_datasets as tfds
import keras.api._v2.keras as keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model
from keras.layers import Activation, Dense, Dropout, Input, Flatten, Embedding, Masking, LSTM, Conv2D, MaxPooling2D, Conv1D, MaxPooling1D
from keras.models import Sequential, Model
from keras.optimizers import adam_v2
from keras.engine.input_layer import InputLayer
from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint
from keras.backend import flatten
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler  
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

MODEL_DIRECTORY = '/gdrive/My Drive/Colab Notebooks/SYSC 5906/models/weights/'
#PICKLE_DIRECTORY = '/gdrive/My Drive/Colab Notebooks/SYSC 5906/datasets/mit_indoors/processed/data_subset/7 classes/'
PICKLE_DIRECTORY = '/gdrive/My Drive/Colab Notebooks/SYSC 5906/datasets/mit_indoors/processed/data_labelsOnly/'

**Get the datasets:**

In [None]:
#Import dataset
pickledData = open(PICKLE_DIRECTORY+"listOfAllObj_v3.pkl","rb")
dataSet = pickle.load(pickledData)
pickledData.close()

#Import list of unqiue objects from training dataset
pickledObjs = open(PICKLE_DIRECTORY+"uniqueObjs_v3.pkl","rb")
uniqueObjs = pickle.load(pickledObjs)
uniqueObjs = dataSet.columns[0:-1]
pickledObjs.close()

In [None]:
#Split the selected dataset into training and validation subsets
training_set, validation_set = train_test_split(dataSet, train_size = 0.8, test_size = 0.2, random_state = 42)

#Splitting the predictors (input data) and target 
#variables (answers/things to classify) as X and Y
x_train = training_set.iloc[:,0:-1]
y_train = training_set.iloc[:,-1]

x_val = validation_set.iloc[:,0:-1]
y_val = validation_set.iloc[:,-1]

x_train_tensor = tf.constant(x_train)
y_train_tensor = tf.constant(y_train)
x_val_tensor = tf.constant(x_val)
y_val_tensor = tf.constant(y_val)

#Number of classes to detect based on the dataset imported
NUM_CLASSES = 7 #12 for main subset, 7 for more balanced version
NUM_OBJ_TYPES = x_train_tensor.shape[1]

print(x_train_tensor.shape)

(1095, 2204)


##Step 3: Build the Models
Define all models to be trained and tested

##**MODEL 1: STATIC**
A basic **sequential FNN model**

In [None]:
#MODEL #3
MODEL_ID = 'room_classifier_3'

#Build a model
model = Sequential()

#Define the model layers
model.add(InputLayer(input_shape=(NUM_OBJ_TYPES,1), name='Input_Layer'))
model.add(tf.keras.layers.Flatten()) #MODEL DOES NOT WORK WITHOUT THIS FLATTEN!!!!
model.add(Dense(64, activation='relu', name='Hidden_Layer_1'))
model.add(Dense(32, activation='relu', name='Hidden_Layer_2'))
# model.add(Dense(20, activation='relu', name='Hidden_Layer_3'))
model.add(Dense(10, activation = 'relu',name='Hidden_Layer_4'))
model.add(Dense(NUM_CLASSES, activation='softmax', name='Output_Layer'))

#Visualize the model
print(model.summary())
plot_model(model, to_file='model.png', show_shapes=True, dpi=100, show_layer_names=True)

#Define a loss function for training the model
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
#opt = tf.keras.optimizers.Adam(learning_rate=0.00005,beta_1=0.9,beta_2=0.99,epsilon=1e-7,amsgrad=True)

#Configure and compile the model
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 2204)              0         
                                                                 
 Hidden_Layer_1 (Dense)      (None, 64)                141120    
                                                                 
 Hidden_Layer_2 (Dense)      (None, 32)                2080      
                                                                 
 Hidden_Layer_4 (Dense)      (None, 10)                330       
                                                                 
 Output_Layer (Dense)        (None, 7)                 77        
                                                                 
Total params: 143,607
Trainable params: 143,607
Non-trainable params: 0
_________________________________________________________________
None


##**MODEL 2:** 
Basic **sequential FNN model** with a hidden dropout and 3 hidden layers

In [None]:
#MODEL #2
MODEL_ID = 'room_classifier_2.sav'

#Build a model
model = Sequential()

#Define the model layers
model.add(InputLayer(input_shape=(111,3),ragged=True, name='Input_Layer'))
model.add(tf.keras.layers.Flatten()) #MODEL DOES NOT WORK WITHOUT THIS FLATTEN!!!!
model.add(Dense(795, activation='relu', name='Hidden_Layer_1'))
model.add(Dense(512, activation='relu', name='Hidden_Layer_2'))
model.add(Dense(256, activation='relu', name='Hidden_Layer_3'))
model.add(Dense(128, activation='relu', name='Hidden_Layer_4'))
model.add(Dropout(0.2)) #Dropout hidden layer during training
model.add(Dense(NUM_CLASSES, activation='softmax', name='Output_Layer'))

#Visualize the model
print(model.summary())
plot_model(model, to_file='model.png', show_shapes=True, dpi=100, show_layer_names=True)

#Define a loss function for training the model
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

#Configure and compile the model
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

Model: "sequential_18"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_13 (Flatten)        (None, 333)               0         
                                                                 
 Hidden_Layer_1 (Dense)      (None, 795)               265530    
                                                                 
 Hidden_Layer_2 (Dense)      (None, 512)               407552    
                                                                 
 Hidden_Layer_3 (Dense)      (None, 256)               131328    
                                                                 
 Hidden_Layer_4 (Dense)      (None, 128)               32896     
                                                                 
 dropout_4 (Dropout)         (None, 128)               0         
                                                                 
 Output_Layer (Dense)        (None, 12)              

##**MODEL 3:** 
Basic **sequential FNN model** with a without any dropout and 3 hidden layers

In [None]:
#MODEL #3
MODEL_ID = 'room_classifier_3.sav'

#Build a model
model = Sequential()

#Define the model layers
model.add(InputLayer(input_shape=(NUM_OBJ_TYPES,1), name='Input_Layer'))
model.add(tf.keras.layers.Flatten()) #MODEL DOES NOT WORK WITHOUT THIS FLATTEN!!!!
model.add(Dense(64, activation='relu', name='Hidden_Layer_1'))
model.add(Dense(32, activation='relu', name='Hidden_Layer_2'))
# model.add(Dense(20, activation='relu', name='Hidden_Layer_3'))
model.add(Dense(10, activation = 'relu',name='Hidden_Layer_4'))
model.add(Dense(NUM_CLASSES, activation='softmax', name='Output_Layer'))

#Visualize the model
print(model.summary())
plot_model(model, to_file='model.png', show_shapes=True, dpi=100, show_layer_names=True)

#Define a loss function for training the model
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
#opt = tf.keras.optimizers.Adam(learning_rate=0.00005,beta_1=0.9,beta_2=0.99,epsilon=1e-7,amsgrad=True)

#Configure and compile the model
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_4 (Flatten)         (None, 2204)              0         
                                                                 
 Hidden_Layer_1 (Dense)      (None, 64)                141120    
                                                                 
 Hidden_Layer_2 (Dense)      (None, 32)                2080      
                                                                 
 Hidden_Layer_4 (Dense)      (None, 10)                330       
                                                                 
 Output_Layer (Dense)        (None, 7)                 77        
                                                                 
Total params: 143,607
Trainable params: 143,607
Non-trainable params: 0
_________________________________________________________________
None


##**MODEL 6:** 
A **functional RNN model** with a without hidden dropouts and 4 hidden layers

In [None]:
#MODEL #6
MODEL_ID = 'room_classifier_6.sav'

#Build a model
model = Sequential()

#Define the model layers
# model.add(Embedding(input_dim=NUM_CLASSES,
#               input_length = len(tensor_x_train),
#               output_dim=100,
#               trainable=False,
#               mask_zero=True))
# model.add(Masking(mask_value=0.0))
model.add(InputLayer(input_shape=(50,50),ragged=False, name='Input_Layer'))
model.add(LSTM(50, return_sequences=True,dropout=0.1, recurrent_dropout=0.1))
#model.add(LSTM(units=50,input_shape=(5,5),return_sequences=True,dropout=0.1, recurrent_dropout=0.1))
model.add(Dense(50, activation='relu'))
model.add(Dense(25, activation='relu'))
model.add(Flatten())
#model.add(Dropout(0.8))
model.add(Dense(NUM_CLASSES, activation='softmax'))
#model.add(Dense(1, activation='softmax'))

# #Build a model
# model = Model(inputs=visible_layer, outputs=output_layer)

#Visualize the model
print(model.summary())
plot_model(model, to_file='model_rnn.png', show_shapes=True, dpi=100, show_layer_names=True)

#Define a loss function for training the model
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
#opt = tf.keras.optimizers.Adam(learning_rate=0.0005,beta_1=0.9,beta_2=0.99,epsilon=1e-7,amsgrad=True)

#Configure and compile the model
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 50, 50)            20200     
                                                                 
 dense_3 (Dense)             (None, 50, 50)            2550      
                                                                 
 dense_4 (Dense)             (None, 50, 25)            1275      
                                                                 
 flatten_2 (Flatten)         (None, 1250)              0         
                                                                 
 dense_5 (Dense)             (None, 7)                 8757      
                                                                 
Total params: 32,782
Trainable params: 32,782
Non-trainable params: 0
_________________________________________________________________
None


##**MODEL 7:** 
A **ensemble model** with two sub-models without any dropout and 3 hidden layers each

In [None]:
#MODEL #7 --- ENSEMBLE VERSION
MODEL_ID = 'room_classifier_7.sav'

#### Model 1
model_1 = Sequential()
model_1.add(InputLayer(input_shape=(111,3),ragged=True, name='Input_Layer'))
model_1.add(tf.keras.layers.Flatten()) #MODEL DOES NOT WORK WITHOUT THIS FLATTEN!!!!
model_1.add(Dense(512, activation='relu', name='Hidden_Layer_1'))
model_1.add(Dense(256, activation='relu', name='Hidden_Layer_2'))
model_1.add(Dense(128, activation='relu', name='Hidden_Layer_3'))
model_1.add(Dense(NUM_CLASSES, activation='softmax', name='Output_Layer'))

### Model 2
model_2 = Sequential()
model_2.add(InputLayer(input_shape=(111,3),ragged=True, name='Input_Layer'))
model_2.add(tf.keras.layers.Flatten()) #MODEL DOES NOT WORK WITHOUT THIS FLATTEN!!!!
model_2.add(Dense(512, activation='relu', name='Hidden_Layer_1'))
model_2.add(Dense(256, activation='relu', name='Hidden_Layer_2'))
model_2.add(Dense(128, activation='relu', name='Hidden_Layer_3'))
model_2.add(Dense(NUM_CLASSES, activation='softmax', name='Output_Layer'))

#Combine the models in a ensemble
#Build a model
model = [model_1, model_2]
model_input = tf.keras.Input(shape=(125, 125, 3))
model_outputs = [m(model_input) for m in model]
ensemble_output = tf.keras.layers.Average()(model_outputs)
ensemble_model = tf.keras.Model(inputs=model_input, outputs=ensemble_output)

#Visualize the model
print(model.summary())
plot_model(model, to_file='model.png', show_shapes=True, dpi=100, show_layer_names=True)

#Define a loss function for training the model
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

#Configure and compile the model
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

##Step 4: Training
Train all models, the models are trained on a portion
of the data that is designated as the training dataset

Train the custom **classifiers**:

In [None]:
# Create callbacks for training
earlyStop = EarlyStopping(monitor='loss', patience=5)
#modelChkPnt = ModelCheckpoint((MODEL_DIRECTORY+"CHKPNT_"+MODEL_ID), monitor='val_acc', verbose=1, save_best_only=True, mode='max') #TODO: Fix this

callbacks = [earlyStop]

In [None]:
#Train the model on the dataset
history = model.fit(x_train_tensor, y_train_tensor, epochs=100, 
          callbacks=callbacks, validation_data=(x_val_tensor,y_val_tensor)) #7011, cols in data = 2337
print(history)

#Wrap the model so it returns a probability
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

##Step 5: Validation
Validate the models on the validation dataset

In [None]:
#Check the performance of the model on the validation set
model.evaluate(x_val_tensor,  y_val_tensor, verbose=2)
probability_model(x_val_tensor[:5])
y_pred = model.predict(x_val_tensor)
print(x_val_tensor)

#Determine classification from the outputed probabilities
y_classes = []
for y in y_pred:
    y_classes.append(y.argmax())
print(y_classes)

##Step 6: Results
Validate the models, determines the accuracy of their predictions on
the validation dataset

In [None]:
#Setup confusion matrix figure
plt.rcParams['figure.figsize'] = (100,100)

#Plot the accuracy (confusion matrix)
ConfusionMatrixDisplay.from_predictions(y_val, y_classes)
plt.show()

#Basic version of the CF
print(tf.math.confusion_matrix(y_val,y_classes))

In [None]:
#Determine metrics
# 'micro': Calculate metrics globally by counting the total true positives, false negatives and false positives.
# 'macro': Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account.
print("Accuracy on validation data: %f" % sk.metrics.accuracy_score(y_val, y_classes))
print("Balanced accuracy on validation data: %f" % sk.metrics.balanced_accuracy_score(y_val, y_classes))
print("Recall score on validation data: %f" % sk.metrics.recall_score(y_val, y_classes, average='macro', zero_division=0))
print("Precision score on validation data: %f" % sk.metrics.precision_score(y_val, y_classes, average='macro', zero_division=0))
print("F1 score on validation data: %f" % sk.metrics.f1_score(y_val, y_classes, average='macro'))

##Step 7: Exporting & Deploying
Save the model as a pickle

In [None]:
#Save the current model
model.save(MODEL_DIRECTORY+MODEL_ID)

