In [None]:
import numpy as np
import pandas as pd 
import os
import cv2
import tensorflow as tf
from tqdm import tqdm
import pickle
import joblib
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         #print(os.path.join(dirname, filename))
#         pass

In [None]:
SEQUENCE_LENGTH = 20  
IMG_SIZE = (224, 224)

video_dataset_path = "/kaggle/input/data-set-updated/data set updated"
output_sequence_path = "/kaggle/working/sequence_dataset"

# Create output folder structure
os.makedirs(output_sequence_path, exist_ok=True)

# Process each action category
for action in os.listdir(video_dataset_path):
    action_path = os.path.join(video_dataset_path, action)
    if not os.path.isdir(action_path):
        continue

    # Create action directory in output
    output_action_path = os.path.join(output_sequence_path, action)
    os.makedirs(output_action_path, exist_ok=True)

    # Process each video
    for video_name in tqdm(os.listdir(action_path), desc=f"Processing {action}"):
        video_path = os.path.join(action_path, video_name)
        
        # Skip non-video files
        if not video_name.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
            continue

        # Create video-specific folder for frames
        video_folder = os.path.join(output_action_path, os.path.splitext(video_name)[0])
        os.makedirs(video_folder, exist_ok=True)

        # Extract frames with temporal awareness
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        # Calculate equally spaced frame indices
        frame_indices = np.linspace(0, total_frames-1, SEQUENCE_LENGTH, dtype=int)

        for idx, frame_idx in enumerate(frame_indices):
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            ret, frame = cap.read()
            if ret:
                # Preprocess frame
                frame = cv2.resize(frame, IMG_SIZE)
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                cv2.imwrite(os.path.join(video_folder, f"frame_{idx:04d}.jpg"), frame)
            else:
                # Handle missing frames with black image
                black_frame = np.zeros((*IMG_SIZE, 3), dtype=np.uint8)
                cv2.imwrite(os.path.join(video_folder, f"frame_{idx:04d}.jpg"), black_frame)
        
        cap.release()

In [None]:
import tensorflow as tf

class SequenceGenerator(tf.keras.utils.Sequence):
    def __init__(self, dataset_path, batch_size=8, shuffle=True):
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.classes = sorted([d for d in os.listdir(dataset_path) 
                             if os.path.isdir(os.path.join(dataset_path, d))])
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}
        self.samples = self._prepare_samples(dataset_path)
        self.on_epoch_end()

    def _prepare_samples(self, dataset_path):
        samples = []
        for class_name in self.classes:
            class_path = os.path.join(dataset_path, class_name)
            for video_folder in os.listdir(class_path):
                video_path = os.path.join(class_path, video_folder)
                if os.path.isdir(video_path):
                    samples.append((video_path, self.class_to_idx[class_name]))
        return samples

    def __len__(self):
        return int(np.ceil(len(self.samples) / self.batch_size))

    def __getitem__(self, index):
        batch_samples = self.samples[index*self.batch_size:(index+1)*self.batch_size]
        X, y = self._load_batch(batch_samples)
        return X, y

    def _load_batch(self, batch_samples):
        batch_sequences = []
        batch_labels = []
        
        for video_path, label in batch_samples:
            # Load and sort frames
            frame_files = sorted([f for f in os.listdir(video_path) 
                                if f.endswith('.jpg')])
            sequence = []
            
            for frame_file in frame_files[:SEQUENCE_LENGTH]:
                img = cv2.imread(os.path.join(video_path, frame_file))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = img.astype(np.float32) / 255.0
                sequence.append(img)
            
            # Pad if necessary
            while len(sequence) < SEQUENCE_LENGTH:
                sequence.append(np.zeros_like(sequence[0]))
            
            batch_sequences.append(sequence)
            batch_labels.append(label)
        
        return np.array(batch_sequences), tf.keras.utils.to_categorical(batch_labels, num_classes=len(self.classes))

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.samples)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam

def build_hybrid_model(input_shape, num_classes):
    model = Sequential([
        # TimeDistributed CNN Block 1
        TimeDistributed(Conv2D(32, (3,3), activation='relu', padding='same'), input_shape=input_shape),
        TimeDistributed(BatchNormalization()),
        TimeDistributed(MaxPooling2D(2,2)),
        
        # TimeDistributed CNN Block 2
        TimeDistributed(Conv2D(64, (3,3), activation='relu', padding='same')),
        TimeDistributed(BatchNormalization()),
        TimeDistributed(MaxPooling2D(2,2)),
        
        # TimeDistributed CNN Block 3
        TimeDistributed(Conv2D(128, (3,3), activation='relu', padding='same')),
        TimeDistributed(BatchNormalization()),
        TimeDistributed(MaxPooling2D(2,2)),
        
        # Flatten before LSTM
        TimeDistributed(Flatten()),
        
        # LSTM Layer
        LSTM(64, return_sequences=False),
        Dropout(0.3),
        
        # Classifier Head
        Dense(64, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    
    model.compile(optimizer=Adam(learning_rate=0.0001),
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])
    return model

# Initialize model with parameters
SEQUENCE_LENGTH = 20  # Number of frames per sequence
IMG_SIZE = (224, 224) # Input image dimensions
model = build_hybrid_model(
    input_shape=(SEQUENCE_LENGTH, IMG_SIZE[0], IMG_SIZE[1], 3),
    num_classes=5
)

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Paths
output_sequence_path = "/kaggle/working/sequence_dataset"
train_path = os.path.join(output_sequence_path, "train")
val_path = os.path.join(output_sequence_path, "val")

# Create directories
os.makedirs(train_path, exist_ok=True)
os.makedirs(val_path, exist_ok=True)

# Split dataset (80% train, 20% validation)
for class_name in os.listdir(output_sequence_path):
    class_path = os.path.join(output_sequence_path, class_name)
    
    # Skip non-class directories
    if not os.path.isdir(class_path) or class_name in ["train", "val"]:
        continue
    
    # Get all video folders for this class
    video_folders = [f for f in os.listdir(class_path)
                    if os.path.isdir(os.path.join(class_path, f))]
    
    # Split videos
    train_videos, val_videos = train_test_split(video_folders, test_size=0.2, random_state=42)
    
    # Create class directories in train/val
    os.makedirs(os.path.join(train_path, class_name), exist_ok=True)
    os.makedirs(os.path.join(val_path, class_name), exist_ok=True)
    
    # Move video folders to appropriate directories
    for video in train_videos:
        src = os.path.join(class_path, video)
        dst = os.path.join(train_path, class_name, video)
        shutil.move(src, dst)
        
    for video in val_videos:
        src = os.path.join(class_path, video)
        dst = os.path.join(val_path, class_name, video)
        shutil.move(src, dst)

print("Dataset organized into train/val folders!")

In [None]:
# After organizing the data, initialize generators like this:
train_generator = SequenceGenerator(train_path)
val_generator = SequenceGenerator(val_path, shuffle=False)

In [None]:
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import ModelCheckpoint

# Initialize generators with proper paths
train_generator = SequenceGenerator(
    os.path.join(output_sequence_path, "train"),
    batch_size=4,
    shuffle=True
)

val_generator = SequenceGenerator(
    os.path.join(output_sequence_path, "val"),
    batch_size=4,
    shuffle=False
)

In [None]:
# Create checkpoint directory
checkpoint_dir = os.path.join('/kaggle/working', "training_checkpoints")
os.makedirs(checkpoint_dir, exist_ok=True)

checkpoint_callback = ModelCheckpoint(
    filepath=os.path.join(checkpoint_dir, 
                         "epoch_{epoch:02d}_valacc_{val_accuracy:.2f}.keras"),  # Changed to .keras
    monitor='val_accuracy',
    save_best_only=False,
    save_weights_only=False,
    mode='max',
    save_freq='epoch',
    verbose=1
)

In [None]:
# Calculate class weights (now based on training data only)
class_counts = np.bincount([label for _, label in train_generator.samples])
class_weights = compute_class_weight(
    'balanced',
    classes=np.unique([label for _, label in train_generator.samples]),
    y=[label for _, label in train_generator.samples]
)
class_weights_dict = dict(enumerate(class_weights))  # Correct mapping

# In model.fit(), use the dictionary instead of the array
with tf.device('/GPU:0'):
    history = model.fit(
        train_generator,
        validation_data=val_generator,
        epochs=10,
        class_weight=class_weights_dict,  # FIX: Use the dictionary here
        callbacks=[checkpoint_callback]
    )

In [None]:
from sklearn.metrics import classification_report

# Plot training history
plt.figure(figsize=(12, 5))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.legend()
plt.show()

# Generate predictions
y_true = []
y_pred = []

for i in range(len(val_generator)):
    X, y = val_generator[i]
    preds = model.predict(X)
    y_true.extend(np.argmax(y, axis=1))
    y_pred.extend(np.argmax(preds, axis=1))

print(classification_report(y_true, y_pred, target_names=val_generator.classes))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt='d', xticklabels=val_generator.classes, 
           yticklabels=val_generator.classes)
plt.title('Confusion Matrix')
plt.show()