In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import cv2
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import itertools
import h5py
import skvideo.io
#Function for Feature Extraction

def frame_crop_center(video,cropf):
    f,_,_,_ = video.shape
    startf = f//2 - cropf//2
    return video[startf:startf+cropf, :, :, :]



In [2]:

def feature_extraction(video_path, frame_size=60, seq_len=5):
    width=frame_size
    height=frame_size
    sequence_length=seq_len
    frames_list=[]
    #Read the Video
    video_reader = cv2.VideoCapture(video_path)
    #get the frame count
    frame_count=int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    #Calculate the interval after which frames will be added to the list
    skip_interval = max(int(frame_count/sequence_length), 1)
    #iterate through video frames
    for counter in range(sequence_length):
        #Set the current frame postion of the video
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, counter * skip_interval)
        #Read the current frame
        ret, frame = video_reader.read()
        if not ret:
            break;
        #Resize the image
        frame=cv2.resize(frame, (height, width))
        frame = frame/255
        #Append to the frame
        frames_list.append(frame)
    video_reader.release()
    #Return the Frames List
    return frames_list

#Function for loading video files, Process and store in a data set
def load_video(datasets):
    global image
    label_index=0
    labels=[]
    images=[]
    #Iterate through each foler corresponding to category
    for folder in datasets:
        for file in tqdm(os.listdir(folder)):
            #Get the path name for each video
            video_path = os.path.join(folder, file)
            #Extract the frames of the current video
            frames_list = feature_extraction(video_path, 60, 5)
            images.append(frames_list)
            labels.append(label_index)
        label_index+=1
    return np.array(images, dtype='float16'), np.array(labels, dtype='int32')


In [3]:


def extract_tarian(path):
    list_video = []
    list_label = []
    label_index = 0
    video_dims = []
    for folder in path:
        for f in tqdm(os.listdir(folder)):
            f = os.path.join(folder, f)
        # checking if it is a file
            
            video = skvideo.io.vread(f)
            video_dims.append(video.shape)
            L=[]

            #resize video dimensions
            for i in range(video.shape[0]):
                frame = cv2.resize(video[i], (128,128), interpolation=cv2.INTER_CUBIC)
                L.append(frame)

            video = np.asarray(L)

            #center crop video to have consistent video frame number
            video = frame_crop_center(video, 42)

            list_video.append(video)
            list_label.append(label_index)
        label_index += 1
        
    return list_video, list_label, video_dims
# label_data = pd.read_csv("/media/virgantara/DATA1/Penelitian/Datasets/HumanMotionDB/hmdb51_org", sep=' ', header=None)



In [4]:

path=[]
dir_path = "dataset"
for dir in os.listdir(dir_path):
    path.append(os.path.join(dir_path,dir))

print(path[0])
#
list_video, list_label, video_dims = extract_tarian(path)

print(np.array(list_video).shape)
#
# import h5py

# with h5py.File("dataset_tarian.h5", "w") as f:
#     f.create_dataset("images", data=np.asarray(images))
#     f.create_dataset("labels", data=np.asarray(labels))



dataset/saman


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 69/69 [00:39<00:00,  1.73it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 133/133 [00:36<00:00,  3.67it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:07<00:00,  3.00it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 52/52 [00:14<00:00,  3.61it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 35/35 

(385,)



  print(np.array(list_video).shape)


In [5]:
data= pd.DataFrame(video_dims, columns=['frame_length', 'height', 'width', 'channels'])
data.describe()

Unnamed: 0,frame_length,height,width,channels
count,385.0,385.0,385.0,385.0
mean,376.350649,383.974026,486.623377,3.0
std,149.383993,170.636216,379.917104,0.0
min,6.0,210.0,190.0,3.0
25%,256.0,300.0,200.0,3.0
50%,500.0,340.0,400.0,3.0
75%,500.0,340.0,400.0,3.0
max,552.0,720.0,1280.0,3.0


In [15]:
# [video for video in surv_fights if video.shape[0] == 42]
# videos, labels = [video, lb for video, lb in zip(list_video, list_label) if video.shape[0] == 42]
videos = []
labels = []
for video,label in zip(list_video,list_label):
    if video.shape[0] == 42:
        videos.append(video)
        labels.append(label)
#         print(np.array(video).shape)

In [16]:
print(np.array(videos).shape)
print(np.array(labels).shape)

(383, 42, 128, 128, 3)
(383,)


In [18]:
import h5py
with h5py.File("dataset_tarian_128w_42seq.h5", "w") as f:
    f.create_dataset("videos", data=np.asarray(videos))
    f.create_dataset("labels", data=np.asarray(labels))

In [19]:
print(np.array(videos).shape)
print(np.array(labels).shape)
videos = np.asarray(videos)
labels = np.asarray(labels)

(383, 42, 128, 128, 3)
(383,)


In [20]:
X_train, X_test, y_train, y_test = train_test_split(videos, labels, test_size=0.2, random_state=2334)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=124567)

print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

(275, 42, 128, 128, 3)
(31, 42, 128, 128, 3)
(77, 42, 128, 128, 3)


In [27]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras import backend as K

TypeError: Descriptors cannot not be created directly.
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.
If you cannot immediately regenerate your protos, some other possible workarounds are:
 1. Downgrade the protobuf package to 3.20.x or lower.
 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).

More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates

In [26]:

# Setting seed for reproducibility
SEED = 77
os.environ["TF_CUDNN_DETERMINISTIC"] = "1"
# tf.random.set_seed(SEED)

# DATA
DATASET_NAME = "fight/nofights"
BATCH_SIZE = 4
# AUTO = tf.data.AUTOTUNE
# INPUT_SHAPE = (3, 60, 60, 3)
INPUT_SHAPE = (42, 128, 128, 3)
NUM_CLASSES = 2

# OPTIMIZER
LEARNING_RATE = 1e-4
WEIGHT_DECAY = 1e-5

# TRAINING
EPOCHS = 20

# TUBELET EMBEDDING
# PATCH_SIZE = (8, 8, 8)
PATCH_SIZE = (8, 8, 8)
NUM_PATCHES = (INPUT_SHAPE[0] // PATCH_SIZE[0]) ** 2

# ViViT ARCHITECTURE
LAYER_NORM_EPS = 1e-6
# PROJECTION_DIM = 30
PROJECTION_DIM = 64
NUM_HEADS = 2
NUM_LAYERS = 2

TypeError: Descriptors cannot not be created directly.
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.
If you cannot immediately regenerate your protos, some other possible workarounds are:
 1. Downgrade the protobuf package to 3.20.x or lower.
 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).

More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates

In [25]:
@tf.function
def preprocess(frames: tf.Tensor, label: tf.Tensor):
    """Preprocess the frames tensors and parse the labels"""
    # Preprocess images
    frames = tf.image.convert_image_dtype(
        frames[
            ..., tf.newaxis
        ],  # The new axis is to help for further processing with Conv3D layers
        tf.float32,
    )

    # Parse label
    label = tf.cast(label, tf.float32)
    return frames, label


def prepare_dataloader(
    videos: np.ndarray,
    labels: np.ndarray,
    loader_type: str = "train",
    batch_size: int = BATCH_SIZE,
):
    """Utility function to prepare dataloader"""
    dataset = tf.data.Dataset.from_tensor_slices((videos, labels))

    if loader_type == "train":
        dataset = dataset.shuffle(BATCH_SIZE * 2)

    dataloader = (
        dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
        .batch(batch_size)
        .prefetch(tf.data.AUTOTUNE)
    )

    return dataloader


trainloader = prepare_dataloader(X_train, y_train, "train")
validloader = prepare_dataloader(X_val, y_val, "valid")
testloader = prepare_dataloader(X_test, y_test, "test")

NameError: name 'tf' is not defined