# VideoNetClassification

Collaborators:

- Yahia Ehab
- Mariam Amr
- Mohamed Khaled

## Installation

In [2]:
#!pip install -q imageio
#!pip install -q opencv-python
#!pip install -q git+https://github.com/tensorflow/docs

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for tensorflow-docs (setup.py) ... [?25l[?25hdone


## Imports

In [3]:
# @title Import the necessary modules
# TensorFlow and TF-Hub modules.
from absl import logging

import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed

logging.set_verbosity(logging.ERROR)

# Some modules to help with reading the UCF101 dataset.
import random
import re
import os
import tempfile
import ssl
import cv2
import numpy as np

# Some modules to display an animation using imageio.
import imageio
from IPython import display

from urllib import request  # requires python3

## Data Loading

In [4]:
import pandas as pd

# Helper functions for the UCF101 dataset
UCF_ROOT = "https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/"
_VIDEO_LIST = None
_CACHE_DIR = tempfile.mkdtemp()
unverified_context = ssl._create_unverified_context()

def list_ucf_videos():
    """Lists videos available in UCF101 dataset."""
    global _VIDEO_LIST
    if not _VIDEO_LIST:
        index = request.urlopen(UCF_ROOT, context=unverified_context).read().decode("utf-8")
        videos = re.findall("(v_[\w_]+\.avi)", index)
        _VIDEO_LIST = sorted(set(videos))
    return list(_VIDEO_LIST)

def fetch_ucf_video(video):
    """Fetches a video and cache into local filesystem."""
    cache_path = os.path.join(_CACHE_DIR, video)
    if not os.path.exists(cache_path):
        urlpath = request.urljoin(UCF_ROOT, video)
        print("Fetching %s => %s" % (urlpath, cache_path))
        data = request.urlopen(urlpath, context=unverified_context).read()
        open(cache_path, "wb").write(data)
    return cache_path

def crop_center_square(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]

def load_video(path, max_frames=0, resize=(224, 224)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]
            frames.append(frame)

            if max_frames != 0 and len(frames) == max_frames:
                break
    finally:
        cap.release()
    return np.array(frames) / 255.0

def to_gif(images):
    converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)
    imageio.mimsave('./animation.gif', converted_images, duration=40)
    return embed.embed_file('./animation.gif')

# Define a function to create DataFrame with video paths and labels
def create_dataframe(num_videos=300):
    video_paths = []
    labels = []

    # List all UCF101 videos
    ucf_videos = list_ucf_videos()

    # Randomly select videos
    random_videos = random.sample(ucf_videos, num_videos)

    # Extract labels from video filenames
    for video in random_videos:
        label = video.split('_')[1]
        video_paths.append(fetch_ucf_video(video))
        labels.append(label)

    # Create DataFrame
    df = pd.DataFrame({'video_paths': video_paths, 'labels': labels})
    return df

# Create DataFrame with video paths and labels
df = create_dataframe()

# # Display the DataFrame
# print(df.head())


Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_PlayingGuitar_g01_c05.avi => /tmp/tmpqbgn4qgc/v_PlayingGuitar_g01_c05.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_Knitting_g23_c04.avi => /tmp/tmpqbgn4qgc/v_Knitting_g23_c04.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_CricketShot_g21_c05.avi => /tmp/tmpqbgn4qgc/v_CricketShot_g21_c05.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_ApplyEyeMakeup_g10_c04.avi => /tmp/tmpqbgn4qgc/v_ApplyEyeMakeup_g10_c04.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_Archery_g18_c03.avi => /tmp/tmpqbgn4qgc/v_Archery_g18_c03.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_ShavingBeard_g14_c05.avi => /tmp/tmpqbgn4qgc/v_ShavingBeard_g14_c05.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_UnevenBars_g22_c01.avi => /tmp/tmpqbgn4qgc/v_UnevenBars_g22_c01.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_UnevenBars_g01_c03.avi => /tmp/tmpqbgn4qgc

In [5]:
import pandas as pd
# Save the DataFrame to a CSV file
df.to_csv('ucf101_videos_labels.csv', index=False)
#df = pd.read_csv('ucf101_videos_labels.csv')

In [5]:
df.head()

Unnamed: 0,video_paths,labels
0,/tmp/tmpcioi6mai/v_BodyWeightSquats_g03_c01.avi,BodyWeightSquats
1,/tmp/tmpcioi6mai/v_FrisbeeCatch_g20_c02.avi,FrisbeeCatch
2,/tmp/tmpcioi6mai/v_Rafting_g08_c06.avi,Rafting
3,/tmp/tmpcioi6mai/v_BenchPress_g14_c02.avi,BenchPress
4,/tmp/tmpcioi6mai/v_HandstandWalking_g14_c03.avi,HandstandWalking
5,/tmp/tmpcioi6mai/v_ApplyEyeMakeup_g02_c03.avi,ApplyEyeMakeup
6,/tmp/tmpcioi6mai/v_RockClimbingIndoor_g07_c02.avi,RockClimbingIndoor
7,/tmp/tmpcioi6mai/v_Fencing_g14_c02.avi,Fencing
8,/tmp/tmpcioi6mai/v_WalkingWithDog_g08_c03.avi,WalkingWithDog
9,/tmp/tmpcioi6mai/v_PommelHorse_g14_c01.avi,PommelHorse


In [6]:
df['video_paths'][0]

'/tmp/tmpcioi6mai/v_BodyWeightSquats_g03_c01.avi'

### Load Video as GIF

Create `/GIFs` dir

In [6]:
frames_clip = [] # a '2d' array where each element is a group of frames corresponding to one video
for i in range(0, 20):
    # Load the first video from the DataFrame
    video_path = df['video_paths'][i]
    video = load_video(video_path)
    converted_video = np.clip(video*255, 0, 255).astype(np.uint8)
    frames_clip.append(converted_video)
    print("finished video: ", i)

# extract labels from the dataframe
labels = df['labels'].values


finished video:  0
finished video:  1
finished video:  2
finished video:  3
finished video:  4
finished video:  5
finished video:  6
finished video:  7
finished video:  8
finished video:  9
finished video:  10
finished video:  11
finished video:  12
finished video:  13
finished video:  14
finished video:  15
finished video:  16
finished video:  17
finished video:  18
finished video:  19


In [7]:
from tensorflow.keras.applications.inception_v3 import preprocess_input

preprocessed = []
for i in range(0, 20):
    preprocessed.append(preprocess_input(frames_clip[i]))
    print("finished video: ", i)

finished video:  0
finished video:  1
finished video:  2
finished video:  3
finished video:  4
finished video:  5
finished video:  6
finished video:  7
finished video:  8
finished video:  9
finished video:  10
finished video:  11
finished video:  12
finished video:  13
finished video:  14
finished video:  15
finished video:  16
finished video:  17
finished video:  18
finished video:  19


## Preprocessing

- CNN (InceptionV3 Model)
    1. Image Size should be 299*299 (only if we're using the full model)

- RNN
    1. LTSM

### CNN

In [None]:
# Load the InceptionV3 model from TensorFlow Hub
feature_extractor = hub.KerasLayer(
    "https://tfhub.dev/google/imagenet/inception_v3/feature_vector/4", trainable=False
)

In [11]:
# Function to extract features using InceptionV3
from tensorflow.keras.applications import InceptionV3

def extract_video_features(video_frames):
    base_model = InceptionV3(weights='imagenet', include_top=False, pooling='avg')
    video_features = []
    for video in video_frames:
        video_features.append(base_model.predict(video))
    return video_features

video_features = extract_video_features(preprocessed)



In [13]:
video_features[0]

array([[0.18401676, 0.05531062, 0.0089295 , ..., 0.9672409 , 0.368906  ,
        0.0431524 ],
       [0.11820316, 0.05077817, 0.02376477, ..., 0.8598074 , 0.43907297,
        0.        ],
       [0.13875324, 0.02897449, 0.07436821, ..., 1.0457786 , 0.3525169 ,
        0.11626944],
       ...,
       [0.12565708, 0.0431479 , 0.02201115, ..., 1.2463803 , 0.20959705,
        0.02850721],
       [0.12721728, 0.04967622, 0.0322283 , ..., 1.3261371 , 0.38342994,
        0.021658  ],
       [0.12732896, 0.05815209, 0.02572929, ..., 1.3750119 , 0.35216844,
        0.02668826]], dtype=float32)

In [14]:
split_index = int(0.8 * len(video_features))
train, test = video_features[:split_index], video_features[split_index:]
split = int(0.8*len(train))
train_features, validate_features = video_features[:split], video_features[split:]
train_labels, validate_labels = labels[:split], labels[split:]

In [25]:
max = 0
for i in range(0, len(train_features)):
  if train_features[i].shape[1] > max:
    max = train_features[i].shape[1]


In [32]:
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.optimizers import Adam

# RNN model creation
rnn_model = Sequential([
    LSTM(128, return_sequences=True, input_shape=(max, 2048)),
    LSTM(64),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(len(labels), activation='softmax')
])

rnn_model.compile(optimizer=Adam(lr=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

rnn_model.fit(train_features, train_labels, validation_data=(validate_features, validate_labels), epochs=4)

ValueError: Data cardinality is ambiguous:
  x sizes: 238, 180, 56, 124, 199, 241, 241, 153, 125, 75, 101, 72
  y sizes: 12
Make sure all arrays contain the same number of samples.