In [11]:
import cv2
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import keras
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from tensorflow.keras import datasets, applications

In [12]:
#use this string for the path to the folder containing all the images in kaggle

folder_path = '/kaggle/input/ucf101/'

class_indices_file = open(folder_path+ 'UCF101TrainTestSplits-RecognitionTask/ucfTrainTestlist/classInd.txt', 'r')
class_index = {}
for line in class_indices_file:
    class_ = line.split(' ')
    class_index[class_[1].strip('\n')] = int(class_[0])
    
class_indices_file.close()

print(class_index)

{'ApplyEyeMakeup': 1, 'ApplyLipstick': 2, 'Archery': 3, 'BabyCrawling': 4, 'BalanceBeam': 5, 'BandMarching': 6, 'BaseballPitch': 7, 'Basketball': 8, 'BasketballDunk': 9, 'BenchPress': 10, 'Biking': 11, 'Billiards': 12, 'BlowDryHair': 13, 'BlowingCandles': 14, 'BodyWeightSquats': 15, 'Bowling': 16, 'BoxingPunchingBag': 17, 'BoxingSpeedBag': 18, 'BreastStroke': 19, 'BrushingTeeth': 20, 'CleanAndJerk': 21, 'CliffDiving': 22, 'CricketBowling': 23, 'CricketShot': 24, 'CuttingInKitchen': 25, 'Diving': 26, 'Drumming': 27, 'Fencing': 28, 'FieldHockeyPenalty': 29, 'FloorGymnastics': 30, 'FrisbeeCatch': 31, 'FrontCrawl': 32, 'GolfSwing': 33, 'Haircut': 34, 'Hammering': 35, 'HammerThrow': 36, 'HandstandPushups': 37, 'HandstandWalking': 38, 'HeadMassage': 39, 'HighJump': 40, 'HorseRace': 41, 'HorseRiding': 42, 'HulaHoop': 43, 'IceDancing': 44, 'JavelinThrow': 45, 'JugglingBalls': 46, 'JumpingJack': 47, 'JumpRope': 48, 'Kayaking': 49, 'Knitting': 50, 'LongJump': 51, 'Lunges': 52, 'MilitaryParade': 

In [13]:
req_classes = [[1,2], [8,9], [23,24], [30], [87], [70], [72], [100], [59,60,61,62,63,64,65,66,67], 
               [83],[98], [29], [85], [79], [80], [84], [51], [45], [77], [73], [44]]

class_names = []
for _ in req_classes:
    classes = []
    for __ in _:
      classes.append([key for key in list(class_index.keys()) if class_index[key] == __][0])
    class_names.append(classes)
print(class_names)


[['ApplyEyeMakeup', 'ApplyLipstick'], ['Basketball', 'BasketballDunk'], ['CricketBowling', 'CricketShot'], ['FloorGymnastics'], ['SumoWrestling'], ['PullUps'], ['PushUps'], ['WritingOnBoard'], ['PlayingCello', 'PlayingDaf', 'PlayingDhol', 'PlayingFlute', 'PlayingGuitar', 'PlayingPiano', 'PlayingSitar', 'PlayingTabla', 'PlayingViolin'], ['SkyDiving'], ['WalkingWithDog'], ['FieldHockeyPenalty'], ['SoccerPenalty'], ['Shotput'], ['SkateBoarding'], ['SoccerJuggling'], ['LongJump'], ['JavelinThrow'], ['SalsaSpin'], ['Rafting'], ['IceDancing']]


In [14]:
label_dict = {}

label = 1
for _ in req_classes:
    for id in _:
        label_dict[id] = label
    label += 1

print(label_dict)

{1: 1, 2: 1, 8: 2, 9: 2, 23: 3, 24: 3, 30: 4, 87: 5, 70: 6, 72: 7, 100: 8, 59: 9, 60: 9, 61: 9, 62: 9, 63: 9, 64: 9, 65: 9, 66: 9, 67: 9, 83: 10, 98: 11, 29: 12, 85: 13, 79: 14, 80: 15, 84: 16, 51: 17, 45: 18, 77: 19, 73: 20, 44: 21}


In [15]:
#considering only one test train split 01
train_path_file = open(folder_path + "UCF101TrainTestSplits-RecognitionTask/ucfTrainTestlist/trainlist01.txt", "r")
train_path_list = []
train_y = []
# print(next(train_path_file))
list_req_class = [i for l in req_classes for i in l]
print(list_req_class)
for _ in train_path_file:
    path, class_num = _.split(' ')
    if (int(class_num.strip('\n')) in list_req_class):
        train_path_list.append(path)
        train_y.append(label_dict[int(class_num.strip('\n'))])
        #print(int(label_dict[class_num.strip('\n')]))
    
train_path_file.close()


[1, 2, 8, 9, 23, 24, 30, 87, 70, 72, 100, 59, 60, 61, 62, 63, 64, 65, 66, 67, 83, 98, 29, 85, 79, 80, 84, 51, 45, 77, 73, 44]


In [16]:
len(train_path_list)

3035

In [17]:
test_path_file = open(folder_path + "UCF101TrainTestSplits-RecognitionTask/ucfTrainTestlist/testlist01.txt", "r")
test_path_list = []
test_y = []
# print(next(test_path_file))
for _ in test_path_file:
    path = _.strip('\n')
    a,b = path.split('/')
    if (class_index[a] in list_req_class):
        test_path_list.append(path)
        test_y.append(label_dict[class_index[a]])


test_path_file.close()



In [18]:
len(test_path_list)

1214

In [19]:
# Using Keras's to_categorical method to convert labels into one-hot-encoded vectors
train_y_hot = pd.get_dummies(train_y)
test_y_hot = pd.get_dummies(test_y)

In [20]:
labels = np.unique(np.array(train_y))

sig_frame_dict = {}
for i in labels:
    with open('/kaggle/input/train-hist-frames/'+ 'class_'+ str(i) +'_sig_frames.txt', 'r') as readfile:
        contents = readfile.read()
        lines = contents.splitlines()
    for l in lines:
        #print(l)
        l_arr = l.split(" ", 1)
        sig_frame_dict['/kaggle/input/ucf101/UCF101/UCF-101/'+ l_arr[0]] = eval(l_arr[1])

    with open('/kaggle/input/test-hist-frames/'+ 'test_class_'+ str(i) +'_sig_frames.txt', 'r') as read_testfile:
        contents = read_testfile.read()
        lines = contents.splitlines()
    for l in lines:
        l_arr = l.split(" ", 1)
        sig_frame_dict['/kaggle/input/ucf101/UCF101/UCF-101/' + l_arr[0]] = eval(l_arr[1])
    


In [21]:
sig_frame_dict['/kaggle/input/ucf101/UCF101/UCF-101/ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi']

[0, 1, 4, 6, 7, 12, 16, 22, 28, 33, 37, 42, 44, 48, 53, 58, 64, 72, 76, 84]

# ## **data loader**

## for skip interval/random/adaptive frame selection

In [12]:

# cnn_base = keras.applications.resnet.ResNet50(weights="imagenet", include_top=False, input_shape= (224,224,3))

# cnn_out = keras.layers.GlobalMaxPool2D()(cnn_base)
# cnn = keras.Model(inputs=cnn_base.input, outputs=cnn_out)
# cnn.trainable = False

In [13]:
# cnn.summary()

In [22]:
def generate_frames_from_videos(video_path, frame_selection = 'skip'):

        #CAPTURING FRAMES after EVERY 20 FRAMES OF VIDEOS
#         print(video_path, type(video_path))
        video1 = cv2.VideoCapture(video_path)
        #total_frames = video1.get(cv2.CAP_PROP_FRAME_COUNT)
        
        if (frame_selection == 'skip'):
            frames_total = []
            frames_arr = []
            while video1.isOpened():
                ret, frame = video1.read()

                if not ret:  #reached end of video
                    break
                frames_total.append(frame)

            video1.release()
            total_frames = len(frames_total)
            #print(total_frames)
            interval = total_frames // 20

            for _ in range (total_frames):
                if _ % interval == 0:
                    resized_frame = cv2.resize(frames_total[_], (224, 224))
                    
                    resized_frame = tf.keras.applications.resnet.preprocess_input(resized_frame)
                    resized_frame = resized_frame.reshape(1,224,224,3)
                    frames_arr.append(cnn.predict(resized_frame, verbose = 0))
                if _ >= 19 * interval:
                    break

            frames_arr = np.squeeze(np.array(frames_arr), axis = 1)
            return frames_arr
        
        
        elif (frame_selection == 'random'):
            
            frames_total = []
            frames_arr = []
            while video1.isOpened():
                ret, frame = video1.read()

                if not ret:  #reached end of video
                    break
                frames_total.append(frame)

            video1.release()
            total_frames = len(frames_total)
            rand_list = random.sample(range(0,total_frames),20)

            for _ in sorted(rand_list):
                resized_frame = cv2.resize(frames_total[_], (224, 224))
                frames_arr.append(resized_frame)
            return frames_arr
        
        
        elif (frame_selection == 'hist_difference'):
            frames_num = sig_frame_dict[video_path]
            #print(len(frames_num))
            frame_count = 0
            frames_arr = []
            
            frames_total = []
            while video1.isOpened():
                ret, frame = video1.read()

                if not ret:  #reached end of video
                    break
                frames_total.append(frame)
                
            total_frames = len(frames_total)
            
            actual = []
            
            for i in range(len(frames_num)):
                ind = list(frames_num)[i]
                if ind < total_frames:
                    actual.append(ind)
                else:
                    int_ind = np.random.randint(0,total_frames)
                    actual.append(int_ind)
            if (len(actual)< 20):
                actual = actual + list(np.random.randint(0,total_frames,20-len(actual)))
                actual = sorted(actual)
            for ind in actual:
                resized_frame = cv2.resize(frames_total[int(ind)], (224, 224))
                frames_arr.append(resized_frame)
#                 resized_frame = tf.keras.applications.resnet.preprocess_input(resized_frame)
#                 resized_frame = resized_frame.reshape(1,224,224,3)
#                 frames_arr.append(cnn.predict(resized_frame, verbose = 0))

            video1.release()
#             frames_arr = np.squeeze(np.array(frames_arr), axis = 1)
            return frames_arr
            

In [23]:
# from sklearn.utils import shuffle
# train_path,train_labels=shuffle(train_path,train_labels, random_state=42)

In [24]:
z = generate_frames_from_videos('/kaggle/input/ucf101/UCF101/UCF-101/SalsaSpin/v_SalsaSpin_g14_c03.avi', 'hist_difference')
np.array(z).shape

(20, 224, 224, 3)

In [25]:
folder_path = '/kaggle/input/ucf101/'

#             print(str(folder_path + 'UCF101/UCF-101/' + str(video_path, 'UTF-8')))
frame_arr = generate_frames_from_videos(str(folder_path + 'UCF101/UCF-101/' + str(train_path_list[1])), 'hist_difference')

In [26]:
np.array(frame_arr).shape

(20, 224, 224, 3)

In [27]:
import os

In [28]:
def video_data_generator(X=train_path_list, Y=train_y_hot, frame_selection = 'hist_difference'):
        indices = np.arange(len(X))
        np.random.shuffle(indices)
        X_paths = [X[i] for i in indices]
        labels = [Y[i] for i in indices]

        folder_path = '/kaggle/input/ucf101/'
        for i in range (len(X)):
            video_path = X_paths[i]
            label= labels[i]
#             print(type(frame_selection))
            frame_arr = generate_frames_from_videos(str(os.path.join(folder_path, "UCF101/UCF-101/", str(video_path, 'UTF-8'))), frame_selection.decode())
            yield np.array(frame_arr), label



### making an instance of the generator for hist_difference

In [29]:
#genertor converts every argument into bytes

dataset_train = tf.data.Dataset.from_generator(
 video_data_generator,
 args = (train_path_list,train_y_hot,'hist_difference'),
 output_signature=(
     tf.TensorSpec(shape=(20, 224,224,3), dtype=tf.float64),
     tf.TensorSpec(shape=(21,), dtype=tf.int16)
     )
 ).batch(batch_size = 32).prefetch(tf.data.AUTOTUNE)
dataset_test = tf.data.Dataset.from_generator(
 video_data_generator,
 args = (test_path_list,test_y_hot,'hist_difference'),
 output_signature=(
     tf.TensorSpec(shape=(20, 224,224,3), dtype=tf.float64),
     tf.TensorSpec(shape=(21,))
     )
 ).batch(batch_size = 32).prefetch(tf.data.AUTOTUNE)

In [19]:
sig_frame_dict[folder_path+ 'UCF101/UCF-101/' + 'Basketball/v_Basketball_g09_c01.avi']

[0, 12, 24, 36, 37, 40, 43, 45, 47, 48, 56, 60, 65, 68, 72, 73, 75, 78, 80, 81]

## Functional API

## Resnet50

In [19]:
resnet_pt_model = tf.keras.applications.resnet.ResNet50(
    include_top=False,
    weights='imagenet'
)
resnet_pt_model.trainable = False
model_pret_x = Sequential()
model_pret_x.add(TimeDistributed(resnet_pt_model, input_shape= (20,224,224,3)))

model_pret_x.add(TimeDistributed(GlobalMaxPool2D()))

model_pret_x.add(LSTM(128))

# model_pret_x.add(Dense(128, activation = 'sigmoid'))

# model_pret_x.add(Dense(64, activation = 'sigmoid'))

model_pret_x.add(Dense(len(req_classes), activation = 'softmax'))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [17]:


# frame_features_input = keras.layers.Input(shape = (20,100352))

# encoded_sequence = keras.layers.LSTM(128, input_shape = (20,100352), return_sequences = True)(frame_features_input)

# td_dense_layer = keras.layers.TimeDistributed(keras.layers.Dense(32, activation = 'relu'))(encoded_sequence)

# gmp_layer = keras.layers.GlobalMaxPool1D()(td_dense_layer)

# outputs = keras.layers.Dense(len(req_classes), activation="softmax")(gmp_layer)

# model_resnet_lstm_ = keras.Model(frame_features_input, outputs)

In [20]:
early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 5, mode = 'min', restore_best_weights = True)

In [36]:
model_pret_x.compile(loss = 'categorical_crossentropy', optimizer = keras.optimizers.SGD()
                     , metrics = ["accuracy"])

In [37]:
model_pret_x.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed (TimeDistr  (None, 20, 7, 7, 2048)   23587712  
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 20, 2048)         0         
 tributed)                                                       
                                                                 
 lstm (LSTM)                 (None, 128)               1114624   
                                                                 
 dense (Dense)               (None, 21)                2709      
                                                                 
Total params: 24,705,045
Trainable params: 1,117,333
Non-trainable params: 23,587,712
_________________________________________________________________


In [102]:
a=generate_frames_from_videos("/kaggle/input/ucf101/UCF101/UCF-101/PlayingSitar/v_PlayingSitar_g09_c07.avi", 'hist_difference')

In [103]:
np.array(a).shape

(20, 224, 224, 3)

In [122]:
hist_pret_x = model_pret_x.fit(dataset_train, epochs=3,validation_data=dataset_test)

Epoch 1/3
Epoch 2/3
Epoch 3/3


# EfficientNetV2L

In [24]:
eff_net_ptmodel = tf.keras.applications.efficientnet_v2.EfficientNetV2L(
    include_top=False,
    weights='imagenet'
)
eff_net_ptmodel.trainable = False
model_pret_x = Sequential()
model_pret_x.add(TimeDistributed(eff_net_ptmodel, input_shape= (20,224,224,3)))

model_pret_x.add(TimeDistributed(GlobalMaxPool2D()))

model_pret_x.add(LSTM(128))

# model_pret_x.add(Dense(128, activation = 'sigmoid'))

# model_pret_x.add(Dense(64, activation = 'sigmoid'))

model_pret_x.add(Dense(len(req_classes), activation = 'softmax'))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/efficientnetv2-l_notop.h5


In [17]:


# frame_features_input = keras.layers.Input(shape = (20,100352))

# encoded_sequence = keras.layers.LSTM(128, input_shape = (20,100352), return_sequences = True)(frame_features_input)

# td_dense_layer = keras.layers.TimeDistributed(keras.layers.Dense(32, activation = 'relu'))(encoded_sequence)

# gmp_layer = keras.layers.GlobalMaxPool1D()(td_dense_layer)

# outputs = keras.layers.Dense(len(req_classes), activation="softmax")(gmp_layer)

# model_resnet_lstm_ = keras.Model(frame_features_input, outputs)

In [25]:
early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 5, mode = 'min', restore_best_weights = True)

In [26]:
model_pret_x.compile(loss = 'categorical_crossentropy', optimizer = keras.optimizers.SGD()
                     , metrics = ["accuracy"])

In [27]:
model_pret_x.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed (TimeDistr  (None, 20, 7, 7, 1280)   117746848 
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 20, 1280)         0         
 tributed)                                                       
                                                                 
 lstm (LSTM)                 (None, 128)               721408    
                                                                 
 dense (Dense)               (None, 21)                2709      
                                                                 
Total params: 118,470,965
Trainable params: 724,117
Non-trainable params: 117,746,848
_________________________________________________________________


In [None]:
hist_pret_x = model_pret_x.fit(dataset_train, epochs=5,validation_data=dataset_test)

Epoch 1/5


2023-04-20 20:00:57.189074: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential/time_distributed/efficientnetv2-l/block1b_drop/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/5
Epoch 3/5
Epoch 4/5

In [60]:
model_pret_x = tf.keras.models.load_model("/kaggle/input/saved-dataset/eff_net_lstm_hist_1.h5")

In [61]:
hist_pret_x = model_pret_x.fit(dataset_train, epochs=5,validation_data=dataset_test)

Epoch 1/5


2023-04-21 17:57:16.203827: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential/time_distributed/efficientnetv2-l/block1b_drop/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## Attention

In [38]:
from tensorflow.keras.layers import Input, Dense, Permute, Multiply

In [30]:
eff_net_ptmodel = tf.keras.applications.efficientnet_v2.EfficientNetV2L(
    include_top=False,
    weights='imagenet'
)
eff_net_ptmodel.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/efficientnetv2-l_notop.h5


In [34]:

# encoding the images -- the model
eff_out = keras.layers.GlobalAveragePooling2D()(eff_net_ptmodel.output)
encoder = keras.Model(inputs = eff_net_ptmodel.input, outputs= eff_out)


input_feature = keras.layers.Input(shape = [20, 224, 224, 3])


#attention layer connecting the 
encoded_img =keras.layers.TimeDistributed(encoder)(input_feature)
query_img = keras.layers.TimeDistributed(keras.layers.Dense(128, activation = 'relu'))(encoded_img)
key_img = keras.layers.TimeDistributed(keras.layers.Dense(128, activation = 'relu'))(encoded_img)

att_out = keras.layers.Attention()([query_img, key_img])

concat_layer = keras.layers.Concatenate()([att_out, encoded_img])

encoded_sequence = keras.layers.Bidirectional(keras.layers.LSTM(128))(concat_layer)

lstm_dense_td = keras.layers.Dense(64, activation = 'relu')(encoded_sequence)

outputs = keras.layers.Dense(len(req_classes), activation="softmax")(lstm_dense_td)

model_effnet_att_lstm = keras.Model(input_feature, outputs)

In [35]:
model_effnet_att_lstm.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 20, 224, 22  0           []                               
                                4, 3)]                                                            
                                                                                                  
 time_distributed_7 (TimeDistri  (None, 20, 1280)    117746848   ['input_4[0][0]']                
 buted)                                                                                           
                                                                                                  
 time_distributed_8 (TimeDistri  (None, 20, 128)     163968      ['time_distributed_7[0][0]']     
 buted)                                                                                     

In [36]:
early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 5, mode = 'min', restore_best_weights = True)

In [37]:
model_effnet_att_lstm.compile(loss = 'categorical_crossentropy', optimizer = keras.optimizers.Adam()
                     , metrics = ["accuracy"])

In [None]:
model_effnet_att_lstm.fit(dataset_train, epochs=5,validation_data=dataset_test)

Epoch 1/5


2023-04-21 20:10:49.314481: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_3/time_distributed_7/model_2/block1b_drop/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/5