# VideoNetClassification

Collaborators:

- Yahia Ehab
- Mariam Amr
- Mohamed Khaled

## Installation

In [84]:
# !pip install -q imageio
# !pip install -q opencv-python
# !pip install -q git+https://github.com/tensorflow/docs

In [85]:
import tensorflow as tf

print("Metal available:", tf.config.experimental.list_physical_devices("Metal"))
print("Metal device info:", tf.config.list_physical_devices("Metal"))


Metal available: []
Metal device info: []


## Imports

In [86]:
# @title Import the necessary modules
# TensorFlow and TF-Hub modules.
from absl import logging

import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed

logging.set_verbosity(logging.ERROR)

# Some modules to help with reading the UCF101 dataset.
import random
import re
import os
import tempfile
import ssl
import cv2
import numpy as np

# Some modules to display an animation using imageio.
import imageio
from IPython import display

from urllib import request  # requires python3

## Data Loading

In [88]:
import pandas as pd

# Helper functions for the UCF101 dataset
UCF_ROOT = "https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/"
_VIDEO_LIST = None
_CACHE_DIR = tempfile.mkdtemp()
unverified_context = ssl._create_unverified_context()

def list_ucf_videos():
    """Lists videos available in UCF101 dataset."""
    global _VIDEO_LIST
    if not _VIDEO_LIST:
        index = request.urlopen(UCF_ROOT, context=unverified_context).read().decode("utf-8")
        videos = re.findall("(v_[\w_]+\.avi)", index)
        _VIDEO_LIST = sorted(set(videos))
    return list(_VIDEO_LIST)

def fetch_ucf_video(video):
    """Fetches a video and cache into local filesystem."""
    cache_path = os.path.join(_CACHE_DIR, video)
    if not os.path.exists(cache_path):
        urlpath = request.urljoin(UCF_ROOT, video)
        print("Fetching %s => %s" % (urlpath, cache_path))
        data = request.urlopen(urlpath, context=unverified_context).read()
        open(cache_path, "wb").write(data)
    return cache_path

def crop_center_square(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]

#! Implemented before frame loading
# def load_video(path, max_frames=50, resize=(224, 224)):
#     cap = cv2.VideoCapture(path)
#     frames = []
#     try:
#         while True:
#             ret, frame = cap.read()
#             if not ret:
#                 break
#             frame = crop_center_square(frame)
#             frame = cv2.resize(frame, resize)
#             frame = frame[:, :, [2, 1, 0]]
#             frames.append(frame)

#             if max_frames != 0 and len(frames) == max_frames:
#                 break
#     finally:
#         cap.release()
#     return np.array(frames) / 255.0

def to_gif(images):
    converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)
    imageio.mimsave('./animation.gif', converted_images, duration=40)
    return embed.embed_file('./animation.gif')

# Define a function to create DataFrame with video paths and labels
def create_dataframe(num_videos=300):
    video_paths = []
    labels = []

    # List all UCF101 videos
    ucf_videos = list_ucf_videos()

    # Randomly select videos
    random_videos = random.sample(ucf_videos, num_videos)

    # Extract labels from video filenames
    for video in random_videos:
        label = video.split('_')[1]
        video_paths.append(fetch_ucf_video(video))
        labels.append(label)

    # Create DataFrame
    df = pd.DataFrame({'video_paths': video_paths, 'labels': labels})
    return df

# Create DataFrame with video paths and labels
df = create_dataframe()

# # Display the DataFrame
# print(df.head())


Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_YoYo_g02_c01.avi => /var/folders/qw/4tzkkgc16ldbw62c8_s7_fkh0000gn/T/tmp_z_tj6dn/v_YoYo_g02_c01.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_RockClimbingIndoor_g01_c01.avi => /var/folders/qw/4tzkkgc16ldbw62c8_s7_fkh0000gn/T/tmp_z_tj6dn/v_RockClimbingIndoor_g01_c01.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_JugglingBalls_g07_c03.avi => /var/folders/qw/4tzkkgc16ldbw62c8_s7_fkh0000gn/T/tmp_z_tj6dn/v_JugglingBalls_g07_c03.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_HeadMassage_g25_c02.avi => /var/folders/qw/4tzkkgc16ldbw62c8_s7_fkh0000gn/T/tmp_z_tj6dn/v_HeadMassage_g25_c02.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_PlayingDaf_g23_c03.avi => /var/folders/qw/4tzkkgc16ldbw62c8_s7_fkh0000gn/T/tmp_z_tj6dn/v_PlayingDaf_g23_c03.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_CliffDiving_g23_c03.avi => /var/folders/qw/4tzkkgc16ldbw62c8_s7_fkh0000gn/

In [89]:
import pandas as pd
# Save the DataFrame to a CSV file
df.to_csv('ucf101_videos_labels.csv', index=False)
#df = pd.read_csv('ucf101_videos_labels.csv')

In [90]:
df.head()

Unnamed: 0,video_paths,labels
0,/var/folders/qw/4tzkkgc16ldbw62c8_s7_fkh0000gn...,YoYo
1,/var/folders/qw/4tzkkgc16ldbw62c8_s7_fkh0000gn...,RockClimbingIndoor
2,/var/folders/qw/4tzkkgc16ldbw62c8_s7_fkh0000gn...,JugglingBalls
3,/var/folders/qw/4tzkkgc16ldbw62c8_s7_fkh0000gn...,HeadMassage
4,/var/folders/qw/4tzkkgc16ldbw62c8_s7_fkh0000gn...,PlayingDaf


In [91]:
df['video_paths'][0]

'/var/folders/qw/4tzkkgc16ldbw62c8_s7_fkh0000gn/T/tmp_z_tj6dn/v_YoYo_g02_c01.avi'

### Load Video as GIF

Create `/GIFs` dir

In [102]:
def load_video(path, max_frames=50, resize=(224, 224)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]
            frames.append(frame)

            if max_frames != 0 and len(frames) == max_frames:
                break
    finally:
        cap.release()
    return np.array(frames) / 255.0

In [103]:
frames_clip = [] # a '2d' array where each element is a group of frames corresponding to one video
for i in range(0, 300):
    # Load the first video from the DataFrame
    video_path = df['video_paths'][i]
    video = load_video(video_path)
    converted_video = np.clip(video*255, 0, 255).astype(np.uint8)
    frames_clip.append(converted_video)
    print("finished video: ", i)

# extract labels from the dataframe
labels = df['labels'].values


finished video:  0
finished video:  1
finished video:  2
finished video:  3
finished video:  4
finished video:  5
finished video:  6
finished video:  7
finished video:  8
finished video:  9
finished video:  10
finished video:  11
finished video:  12
finished video:  13
finished video:  14
finished video:  15
finished video:  16
finished video:  17
finished video:  18
finished video:  19
finished video:  20
finished video:  21
finished video:  22
finished video:  23
finished video:  24
finished video:  25
finished video:  26
finished video:  27
finished video:  28
finished video:  29
finished video:  30
finished video:  31
finished video:  32
finished video:  33
finished video:  34
finished video:  35
finished video:  36
finished video:  37
finished video:  38
finished video:  39
finished video:  40
finished video:  41
finished video:  42
finished video:  43
finished video:  44
finished video:  45
finished video:  46
finished video:  47
finished video:  48
finished video:  49
finished v

In [104]:
num_of_classes = len(df['labels'].unique())

In [105]:
from tensorflow.keras.applications.inception_v3 import preprocess_input

preprocessed = []
for i in range(0, 300):
    preprocessed.append(preprocess_input(frames_clip[i]))
    print("finished video: ", i)

finished video:  0
finished video:  1
finished video:  2
finished video:  3
finished video:  4
finished video:  5
finished video:  6
finished video:  7
finished video:  8
finished video:  9
finished video:  10
finished video:  11
finished video:  12
finished video:  13
finished video:  14
finished video:  15
finished video:  16
finished video:  17
finished video:  18
finished video:  19
finished video:  20
finished video:  21
finished video:  22
finished video:  23
finished video:  24
finished video:  25
finished video:  26
finished video:  27
finished video:  28
finished video:  29
finished video:  30
finished video:  31
finished video:  32
finished video:  33
finished video:  34
finished video:  35
finished video:  36
finished video:  37
finished video:  38
finished video:  39
finished video:  40
finished video:  41
finished video:  42
finished video:  43
finished video:  44
finished video:  45
finished video:  46
finished video:  47
finished video:  48
finished video:  49
finished v

## Preprocessing

- CNN (InceptionV3 Model)
    1. Image Size should be 299*299 (only if we're using the full model)

- RNN
    1. LTSM

### CNN

In [106]:
# Load the InceptionV3 model from TensorFlow Hub
feature_extractor = hub.KerasLayer(
    "https://tfhub.dev/google/imagenet/inception_v3/feature_vector/4", trainable=False
)

In [107]:
# Function to extract features using InceptionV3
from tensorflow.keras.applications import InceptionV3

def extract_video_features(video_frames):
    base_model = InceptionV3(weights='imagenet', include_top=False, pooling='avg')
    video_features = []
    for video in video_frames:
        video_features.append(base_model.predict(video))
    return np.array(video_features)

video_features = extract_video_features(preprocessed)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 317ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 315ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 318ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 313ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 319ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 338ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 340ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 320ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 326ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 323ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 348ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 336ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [108]:
split = int(0.8*len(video_features))
train, test = video_features[:split], video_features[split:]
train_labels, test_labels = labels[:split], labels[split:]

In [109]:
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

# Create a LabelEncoder object
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)
train_labels_onehot = to_categorical(train_labels_encoded)

test_labels_encoded = label_encoder.fit_transform(test_labels)
test_labels_onehot = to_categorical(test_labels_encoded)


In [110]:
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import sequence

# RNN model creation
rnn_model = Sequential(
    [
        LSTM(128, return_sequences=True, input_shape=(30, 2048)),
        LSTM(64),
        Dense(64, activation="relu"),
        Dropout(0.5),
        Dense(93, activation="softmax"),
    ]
)

rnn_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

print(train.shape) # (240, 30, 2048)
print(rnn_model.input_shape) # (None, 30, 2048)
print(train_labels_onehot.shape) # (240, 93)
print(rnn_model.output_shape) # (None, 93)

(240, 50, 2048)
(None, 30, 2048)
(240, 90)
(None, 93)


  super().__init__(**kwargs)


In [111]:
# from keras.models import Sequential
# from keras.layers import SimpleRNN, Dense

# rnn_model = Sequential()
# rnn_model.add(
#     SimpleRNN(50, input_shape=(None, 2048))
# )  # Adjust based on your specific model
# rnn_model.add(
#     Dense(93, activation="softmax")
# )  # Adjust the number of neurons to match the number of classes

# rnn_model.compile(
#     loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
# )

In [114]:
# Check the number of unique classes in your dataset
num_classes = len(set(train_labels))
print("Number of classes:", num_classes)

# Reshape the training and testing data for RNN input
sequence_length = 50

train_reshaped = train.reshape(-1, sequence_length, 2048)
test_reshaped = test.reshape(-1, sequence_length, 2048)

# RNN model creation
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense

rnn_model = Sequential()
rnn_model.add(SimpleRNN(50, input_shape=(sequence_length, 2048)))
rnn_model.add(Dense(num_classes, activation="softmax"))

rnn_model.compile(
    loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
)

# Train the model
rnn_model.fit(
    x=train_reshaped,
    y=train_labels_onehot,
    epochs=15,
    batch_size=64,
    validation_split=0.2,
)

Number of classes: 90
Epoch 1/15
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 79ms/step - accuracy: 0.0124 - loss: 4.6206 - val_accuracy: 0.0208 - val_loss: 4.7979
Epoch 2/15
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.0638 - loss: 4.1867 - val_accuracy: 0.0417 - val_loss: 4.6465
Epoch 3/15
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.1751 - loss: 3.8338 - val_accuracy: 0.0417 - val_loss: 4.4477
Epoch 4/15
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.3132 - loss: 3.5005 - val_accuracy: 0.0417 - val_loss: 4.2687
Epoch 5/15
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.5514 - loss: 3.1905 - val_accuracy: 0.1042 - val_loss: 4.2561
Epoch 6/15
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.6934 - loss: 2.9456 - val_accuracy: 0.1042 - val_loss: 4.2288
Epoch 7/15
[1m3/3[0m 

<keras.src.callbacks.history.History at 0x13063dcf0>

In [115]:
test_labels_onehot = to_categorical(test_labels_encoded, num_classes=num_classes)

# Evaluate the model
evaluation = rnn_model.evaluate(x=test_reshaped, y=test_labels_onehot)
print("Evaluation results:", evaluation)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.0431 - loss: 4.6558 
Evaluation results: [4.716806888580322, 0.03333333507180214]
