In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [3]:
dataset_path = os.listdir('/content/drive/MyDrive/train_vid')

label_types = os.listdir('/content/drive/MyDrive/train_vid')
print (label_types)

['Right Turn', 'Slow-Stop', 'Straight', 'Left Turn', 'Left Lane Change', 'Right Lane Change']


**Preparing Training Data**

In [4]:
rooms = []

for item in dataset_path:
 # Get all the file names
 all_rooms = os.listdir('/content/drive/MyDrive/train_vid' + '/' +item)

 # Add them to the list
 for room in all_rooms:
    rooms.append((item, str('/content/drive/MyDrive/train_vid' + '/' +item) + '/' + room))

# Build a dataframe
train_df = pd.DataFrame(data=rooms, columns=['tag', 'video_name'])
print(train_df.head())
print(train_df.tail())

          tag                                         video_name
0  Right Turn  /content/drive/MyDrive/train_vid/Right Turn/03...
1  Right Turn  /content/drive/MyDrive/train_vid/Right Turn/0f...
2  Right Turn  /content/drive/MyDrive/train_vid/Right Turn/23...
3  Right Turn  /content/drive/MyDrive/train_vid/Right Turn/3b...
4  Right Turn  /content/drive/MyDrive/train_vid/Right Turn/3c...
                   tag                                         video_name
402  Right Lane Change  /content/drive/MyDrive/train_vid/Right Lane Ch...
403  Right Lane Change  /content/drive/MyDrive/train_vid/Right Lane Ch...
404  Right Lane Change  /content/drive/MyDrive/train_vid/Right Lane Ch...
405  Right Lane Change  /content/drive/MyDrive/train_vid/Right Lane Ch...
406  Right Lane Change  /content/drive/MyDrive/train_vid/Right Lane Ch...


In [5]:
df = train_df.loc[:,['video_name','tag']]
df
df.to_csv('/content/drive/MyDrive/train.csv')

**Preparing Test Data**

In [6]:
dataset_path = os.listdir('/content/drive/MyDrive/test_vid')
print(dataset_path)

room_types = os.listdir('/content/drive/MyDrive/test_vid')
print("Types of activities found: ", len(dataset_path))

rooms = []

for item in dataset_path:
 # Get all the file names
 all_rooms = os.listdir('/content/drive/MyDrive/test_vid' + '/' +item)

 # Add them to the list
 for room in all_rooms:
    rooms.append((item, str('/content/drive/MyDrive/test_vid' + '/' +item) + '/' + room))

# Build a dataframe
test_df = pd.DataFrame(data=rooms, columns=['tag', 'video_name'])
print(test_df.head())
print(test_df.tail())

df = test_df.loc[:,['video_name','tag']]
df
df.to_csv('/content/drive/MyDrive/test.csv')

['Right Turn', 'Slow-Stop', 'Straight', 'Left Turn', 'Left Lane Change', 'Right Lane Change']
Types of activities found:  6
          tag                                         video_name
0  Right Turn  /content/drive/MyDrive/test_vid/Right Turn/0f5...
1  Right Turn  /content/drive/MyDrive/test_vid/Right Turn/248...
2  Right Turn  /content/drive/MyDrive/test_vid/Right Turn/510...
3  Right Turn  /content/drive/MyDrive/test_vid/Right Turn/6b6...
4  Right Turn  /content/drive/MyDrive/test_vid/Right Turn/8ba...
                  tag                                         video_name
91  Right Lane Change  /content/drive/MyDrive/test_vid/Right Lane Cha...
92  Right Lane Change  /content/drive/MyDrive/test_vid/Right Lane Cha...
93  Right Lane Change  /content/drive/MyDrive/test_vid/Right Lane Cha...
94  Right Lane Change  /content/drive/MyDrive/test_vid/Right Lane Cha...
95  Right Lane Change  /content/drive/MyDrive/test_vid/Right Lane Cha...


In [None]:
!pip install git+https://github.com/tensorflow/docs

In [8]:
from tensorflow_docs.vis import embed
from tensorflow import keras
from imutils import paths

import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np
import imageio
import cv2
import os

In [9]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
        print(e)

In [10]:
train_df = pd.read_csv("/content/drive/MyDrive/train.csv")
test_df = pd.read_csv("/content/drive/MyDrive/test.csv")

print(f"Total videos for training: {len(train_df)}")
print(f"Total videos for testing: {len(test_df)}")


train_df.sample(10)

Total videos for training: 407
Total videos for testing: 96


Unnamed: 0.1,Unnamed: 0,video_name,tag
137,137,/content/drive/MyDrive/train_vid/Slow-Stop/db7...,Slow-Stop
61,61,/content/drive/MyDrive/train_vid/Right Turn/d2...,Right Turn
111,111,/content/drive/MyDrive/train_vid/Right Turn/6b...,Right Turn
268,268,/content/drive/MyDrive/train_vid/Left Turn/c06...,Left Turn
294,294,/content/drive/MyDrive/train_vid/Left Turn/0c2...,Left Turn
218,218,/content/drive/MyDrive/train_vid/Straight/f1eb...,Straight
100,100,/content/drive/MyDrive/train_vid/Right Turn/20...,Right Turn
351,351,/content/drive/MyDrive/train_vid/Left Lane Cha...,Left Lane Change
48,48,/content/drive/MyDrive/train_vid/Right Turn/fa...,Right Turn
181,181,/content/drive/MyDrive/train_vid/Straight/a368...,Straight


In [11]:
IMG_SIZE = 224

def crop_center_square(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]


def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]
            frames.append(frame)

            if len(frames) == max_frames:
                break
    finally:
        cap.release()
    return np.array(frames)

**Feature Extraction**

In [12]:
from tensorflow import keras
def build_feature_extractor():
    feature_extractor = keras.applications.VGG16(
        weights="imagenet",
        include_top=False,
        pooling="avg",
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
    )
    preprocess_input = keras.applications.vgg16.preprocess_input

    inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
    preprocessed = preprocess_input(inputs)

    outputs = feature_extractor(preprocessed)
    return keras.Model(inputs, outputs, name="feature_extractor")

feature_extractor = build_feature_extractor()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


**Label Encoding**

In [13]:
label_processor = keras.layers.StringLookup(num_oov_indices=0, vocabulary=np.unique(train_df["tag"]))
print(label_processor.get_vocabulary())

labels = train_df["tag"].values
labels = label_processor(labels[..., None]).numpy()
labels

['Left Lane Change', 'Left Turn', 'Right Lane Change', 'Right Turn', 'Slow-Stop', 'Straight']


array([[3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
       [3],
    

In [18]:
#hyperparameters
IMG_SIZE = 224
BATCH_SIZE = 64
EPOCHS = 100
MAX_SEQ_LENGTH = 20
NUM_FEATURES = 512

In [19]:
def prepare_all_videos(df, root_dir):
    num_samples = len(df)
    video_paths = df["video_name"].values.tolist()

    ##take all classlabels from train_df column named 'tag' and store in labels
    labels = df["tag"].values

    #convert classlabels to label encoding
    labels = label_processor(labels[..., None]).numpy()

    # `frame_masks` and `frame_features` are what we will feed to our sequence model.
    # `frame_masks` will contain a bunch of booleans denoting if a timestep is
    # masked with padding or not.
    frame_masks = np.zeros(shape=(num_samples, MAX_SEQ_LENGTH), dtype="bool") # 145,20
    frame_features = np.zeros(shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32") #145,20,2048

    # For each video.
    for idx, path in enumerate(video_paths):
        # Gather all its frames and add a batch dimension.
        frames = load_video(os.path.join(root_dir, path))
        frames = frames[None, ...]

        # Initialize placeholders to store the masks and features of the current video.
        temp_frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
        temp_frame_features = np.zeros(
            shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
        )

        # Extract features from the frames of the current video.
        for i, batch in enumerate(frames):
            video_length = batch.shape[0]
            length = min(MAX_SEQ_LENGTH, video_length)
            for j in range(length):
                temp_frame_features[i, j, :] = feature_extractor.predict(
                    batch[None, j, :]
                )
            temp_frame_mask[i, :length] = 1  # 1 = not masked, 0 = masked

        frame_features[idx,] = temp_frame_features.squeeze()
        frame_masks[idx,] = temp_frame_mask.squeeze()

    return (frame_features, frame_masks), labels


train_data, train_labels = prepare_all_videos(train_df, "train")
test_data, test_labels = prepare_all_videos(test_df, "test")

print(f"Frame features in train set: {train_data[0].shape}")
print(f"Frame masks in train set: {train_data[1].shape}")



print(f"train_labels in train set: {train_labels.shape}")

print(f"test_labels in train set: {test_labels.shape}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Frame features in train set: (407, 20, 512)
Frame masks in train set: (407, 20)
train_labels in train set: (407, 1)
test_labels in train set: (96, 1)


In [21]:
def get_sequence_model():
    class_vocab = label_processor.get_vocabulary()

    frame_features_input = keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))
    mask_input = keras.Input((MAX_SEQ_LENGTH,), dtype="bool")

    # Refer to the following tutorial to understand the significance of using `mask`:
    # https://keras.io/api/layers/recurrent_layers/gru/
    x = keras.layers.GRU(16, return_sequences=True)(frame_features_input, mask=mask_input)
    x = keras.layers.GRU(8)(x)
    x = keras.layers.Dropout(0.4)(x)
    x = keras.layers.Dense(8, activation="relu")(x)
    output = keras.layers.Dense(len(class_vocab), activation="softmax")(x)

    rnn_model = keras.Model([frame_features_input, mask_input], output)

    rnn_model.compile(
        loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
    )
    return rnn_model

EPOCHS = 30
# Utility for running experiments.
def run_experiment():
    filepath = "./tmp/video_classifier"
    checkpoint = keras.callbacks.ModelCheckpoint(
        filepath, save_weights_only=True, save_best_only=True, verbose=1
    )

    seq_model = get_sequence_model()
    history = seq_model.fit(
        [train_data[0], train_data[1]],
        train_labels,
        validation_split=0.3,
        epochs=EPOCHS,
        callbacks=[checkpoint],
    )

    seq_model.load_weights(filepath)
    _, accuracy = seq_model.evaluate([test_data[0], test_data[1]], test_labels)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")

    return history, seq_model


_, sequence_model = run_experiment()

Epoch 1/30
Epoch 1: val_loss improved from inf to 2.21093, saving model to ./tmp/video_classifier
Epoch 2/30
Epoch 2: val_loss did not improve from 2.21093
Epoch 3/30
Epoch 3: val_loss did not improve from 2.21093
Epoch 4/30
Epoch 4: val_loss did not improve from 2.21093
Epoch 5/30
Epoch 5: val_loss did not improve from 2.21093
Epoch 6/30
Epoch 6: val_loss did not improve from 2.21093
Epoch 7/30
Epoch 7: val_loss did not improve from 2.21093
Epoch 8/30
Epoch 8: val_loss did not improve from 2.21093
Epoch 9/30
Epoch 9: val_loss did not improve from 2.21093
Epoch 10/30
Epoch 10: val_loss did not improve from 2.21093
Epoch 11/30
Epoch 11: val_loss did not improve from 2.21093
Epoch 12/30
Epoch 12: val_loss did not improve from 2.21093
Epoch 13/30
Epoch 13: val_loss did not improve from 2.21093
Epoch 14/30
Epoch 14: val_loss did not improve from 2.21093
Epoch 15/30
Epoch 15: val_loss did not improve from 2.21093
Epoch 16/30
Epoch 16: val_loss did not improve from 2.21093
Epoch 17/30
Epoch 