# Train the DriveSafe model with RNN (LSTM)

- Takes fixed-length video snippets as inputs and produces a label output
- Built based on this [towaradsdatascience article](https://towardsdatascience.com/recurrent-neural-networks-by-example-in-python-ffd204f99470)

In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
from IPython.core.interactiveshell import InteractiveShell
# from IPython.display import HTML

InteractiveShell.ast_node_interactivity = 'all'

In [4]:
# import the necessary packages
import numpy as np
import argparse
import pickle
from utils.data import get_sequence_data
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, TimeDistributed, Flatten
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

# Configurations

In [23]:
# config vars

DATASET_PATH = "data"
OUTPUT_PATH = "model"
NUM_EPOCHS = 25
NUM_FRAMES = 2 # frames per sequence
LABELS = set(["off", "collision", "safe", "tailgating", "weaving"])

# Load Input Data

In [25]:
# load sequence data
print("[INFO] loading sequence data...")
data, labels = get_sequence_data(DATASET_PATH, LABELS, NUM_FRAMES)

# convert the data and labels to numpy arrays
training_data = np.array(data["training"])
training_labels = np.array(labels["training"])
validation_data = np.array(data["validation"])
validation_labels = np.array(labels["validation"])

# count number of sequences
print(f"[INFO] number of images in training_data: {len(training_data)}")
print(f"[INFO] number of images in validation_data: {len(validation_data)}")

# perform one-hot encoding on the labels
lb = LabelBinarizer()
training_labels = lb.fit_transform(training_labels)
validation_labels = lb.transform(validation_labels) # use transform instead of fit_transform because we want to use the same encoder as the training data

# partition the data into training and testing splits
# (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, stratify=labels, random_state=42)
trainX = training_data
testX = validation_data
trainY = training_labels
testY = validation_labels

# trainX has the shape (NUM_FRAMES, 224, 224, 3)
# Reshape the input data to fit LSTM's input requirements
trainX = trainX.reshape(trainX.shape[0], NUM_FRAMES, -1)  # Reshaping each sequence of N frames into a single time step
testX = testX.reshape(testX.shape[0], NUM_FRAMES, -1) # equiv to saying `testX.reshape(NUM_SEQUENCES_IN_TEXT_X, NUM_FRAMES, -1)`

[INFO] loading sequence data...
[INFO] number of images in training_data: 151
[INFO] number of images in validation_data: 151


# Train the Model

In [26]:
# initialize the model
model = Sequential()
model.add(LSTM(128, input_shape=(trainX.shape[1], trainX.shape[2])))
model.add(Dropout(0.5))
model.add(Dense(len(lb.classes_), activation="softmax"))

# compile the model
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# train the model
print("[INFO] training model...")
history = model.fit(trainX, trainY, validation_data=(testX, testY), epochs=NUM_EPOCHS, batch_size=32)


  super().__init__(**kwargs)


[INFO] training model...
Epoch 1/25
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 312ms/step - accuracy: 0.2621 - loss: 1.8493 - val_accuracy: 0.3377 - val_loss: 1.4387
Epoch 2/25
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 265ms/step - accuracy: 0.3689 - loss: 1.4767 - val_accuracy: 0.4040 - val_loss: 1.3475
Epoch 3/25


KeyboardInterrupt: 

# Evaluate the Model

In [None]:
# evaluate the model
print("[INFO] evaluating model...")
predictions = model.predict(testX, batch_size=32)
print(classification_report(testY.argmax(axis=1), predictions.argmax(axis=1), target_names=lb.classes_))

# plot the training loss and accuracy
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, args["epochs"]), history.history["loss"], label="train_loss")
plt.plot(np.arange(0, args["epochs"]), history.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, args["epochs"]), history.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, args["epochs"]), history.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig(args["plot"])

# Save Output to Disk

In [None]:
# serialize the model to disk
print("[INFO] serializing model...")
model.save(args["model"])

# serialize the label binarizer to disk
with open(args["label_bin"], "wb") as f:
    f.write(pickle.dumps(lb))

# Early-stopping and Checkpoints
Patience sets how many epochs to run for before stopping (if no more improvements are made)

In [21]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Create callbacks
callbacks = [EarlyStopping(monitor='val_loss', patience=5),
             ModelCheckpoint('./model/checkpoint.keras', save_best_only=True, save_weights_only=False)]

# Train the model

In [17]:
history = model.fit(X_train,  y_train, 
                    batch_size=2048, epochs=150,
                    callbacks=callbacks,
                    validation_data=(X_valid, y_valid))

NameError: name 'model' is not defined