In [None]:
%pip install tensorflow mediapipe h5py scipy scikit-learn matplotlib numpy pandas protobuf tqdm datasets

In [21]:
import os
import json
import numpy as np
import logging
import matplotlib.pyplot as plt
from keras.layers import LSTM, Dense, Dropout
from keras.models import Sequential
from keras.callbacks import Callback
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import datetime as dt
from tensorflow.keras.callbacks import EarlyStopping

# Set up logging
logging.basicConfig(level=logging.INFO)


In [8]:
# Parameters
root_dir = os.path.join(os.path.dirname(os.getcwd()), "dataset_processing", "archive", "keypoints-rwf-2000")
no_of_timesteps = 20
keypoint_labels = [
    "nose", "left_eye", "right_eye", "left_ear", "right_ear",
    "left_shoulder", "right_shoulder", "left_elbow", "right_elbow",
    "left_wrist", "right_wrist", "left_hip", "right_hip",
    "left_knee", "right_knee", "left_ankle", "right_ankle"
]

# Initialize dataset lists
X = []
y = []

In [9]:
# Custom callback for live plotting
class LivePlotCallback(Callback):
    def on_train_begin(self, logs=None):
        self.losses = []
        self.accuracies = []
        plt.ion()  # Enable interactive mode
        self.fig, self.ax = plt.subplots(1, 2, figsize=(12, 5))
        self.ax[0].set_title("Loss")
        self.ax[1].set_title("Accuracy")

    def on_epoch_end(self, epoch, logs=None):
        self.losses.append(logs["loss"])
        self.accuracies.append(logs["accuracy"])
        
        # Clear and update loss plot
        self.ax[0].cla()
        self.ax[0].plot(self.losses, label="Training Loss", color="blue")
        self.ax[0].set_title("Loss")
        self.ax[0].legend()

        # Clear and update accuracy plot
        self.ax[1].cla()
        self.ax[1].plot(self.accuracies, label="Training Accuracy", color="green")
        self.ax[1].set_title("Accuracy")
        self.ax[1].legend()
        
        plt.pause(0.01)  # Small pause to update the plot
        plt.draw()

    def on_train_end(self, logs=None):
        plt.ioff()
        plt.show()

In [12]:
def load_json_data(json_path, label):
    try:
        with open(json_path) as file:
            data = json.load(file)
            frames_data = []

            if len(data) < no_of_timesteps:
                logging.warning(f"Skipping {json_path} as it has fewer than {no_of_timesteps} frames.")
                return None

            for i in range(no_of_timesteps, len(data)):
                sequence = []
                frames = data[i - no_of_timesteps:i]

                for frame in frames:
                    if frame["detections"]:
                        person = frame["detections"][0]
                        person_keypoints = []
                        
                        keypoints_dict = {kp['label']: kp['coordinates'] for kp in person['keypoints']}
                        
                        for label in keypoint_labels:
                            if label in keypoints_dict:
                                coords = keypoints_dict[label]
                                person_keypoints.extend([coords['x'], coords['y']])
                            else:
                                person_keypoints.extend([0.0, 0.0])
                    else:
                        person_keypoints = [0.0, 0.0] * len(keypoint_labels)
                    
                    sequence.append(person_keypoints)

                frames_data.append(np.array(sequence))
                
            return frames_data

    except Exception as e:
        logging.error(f"Error loading {json_path}: {e}")
        return None

In [13]:
def process_dataset(root_dir):
    global X, y
    for category in ['train', 'val']:
        for label in ['Fight', 'NonFight']:
            category_dir = os.path.join(root_dir, category, label)
            logging.info(f"Processing category '{label}' in '{category}' set...")

            for video_folder in tqdm(os.listdir(category_dir)):
                video_folder_path = os.path.join(category_dir, video_folder)

                if os.path.isdir(video_folder_path):
                    json_path = os.path.join(video_folder_path, f"{video_folder}.json")

                    if os.path.isfile(json_path):
                        sequences = load_json_data(json_path, label)

                        if sequences:
                            X.extend(sequences)
                            y.extend([1 if label == 'Fight' else 0] * len(sequences))


# Load and process dataset
process_dataset(root_dir)

INFO:root:Processing category 'Fight' in 'train' set...
  0%|          | 0/789 [00:00<?, ?it/s]ERROR:root:Error loading c:\Users\gorme\projects\godseye\apps\backend\dataset_processing\archive\keypoints-rwf-2000\train\Fight\-1l5631l3fg_0\-1l5631l3fg_0.json: 'detections'
ERROR:root:Error loading c:\Users\gorme\projects\godseye\apps\backend\dataset_processing\archive\keypoints-rwf-2000\train\Fight\-1l5631l3fg_1\-1l5631l3fg_1.json: 'detections'
ERROR:root:Error loading c:\Users\gorme\projects\godseye\apps\backend\dataset_processing\archive\keypoints-rwf-2000\train\Fight\-1l5631l3fg_2\-1l5631l3fg_2.json: 'detections'
  1%|▏         | 11/789 [00:00<00:50, 15.32it/s]ERROR:root:Error loading c:\Users\gorme\projects\godseye\apps\backend\dataset_processing\archive\keypoints-rwf-2000\train\Fight\0NWz-01A2yk_0\0NWz-01A2yk_0.json: 'detections'
ERROR:root:Error loading c:\Users\gorme\projects\godseye\apps\backend\dataset_processing\archive\keypoints-rwf-2000\train\Fight\0NWz-01A2yk_1\0NWz-01A2yk_1.j

In [14]:
# Convert to numpy arrays with correct shape
X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.int32)


print("Dataset shapes:")
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

if len(X) == 0:
    raise ValueError("No data was loaded. Check the dataset directory and file paths.")

# Normalize the coordinates
mean = np.mean(X.reshape(-1, X.shape[-1]), axis=0)
std = np.std(X.reshape(-1, X.shape[-1]), axis=0)
std = np.where(std == 0, 1, std)
X = (X - mean) / std


Dataset shapes:
X shape: (258180, 20, 34)
y shape: (258180,)


In [15]:
X

array([[[ 0.7377947 ,  3.7373996 ,  0.90387803, ..., -0.8022287 ,
         -0.71034604, -0.80733055],
        [ 3.7173114 ,  2.571439  ,  4.1111116 , ...,  2.1113803 ,
          3.0929332 ,  2.1765988 ],
        [ 3.7132738 ,  2.5828419 ,  4.106408  , ...,  2.0971596 ,
          3.084887  ,  2.1570203 ],
        ...,
        [ 0.6534977 ,  3.2865915 ,  0.8184054 , ...,  2.6056902 ,
          0.62457   ,  2.7528641 ],
        [ 0.6318349 ,  3.1782212 ,  0.7891128 , ...,  2.5791857 ,
          0.61671764,  2.7003205 ],
        [ 0.620937  ,  3.1010623 ,  0.77390504, ...,  2.5156856 ,
          0.6301253 ,  2.6218522 ]],

       [[ 3.7173114 ,  2.571439  ,  4.1111116 , ...,  2.1113803 ,
          3.0929332 ,  2.1765988 ],
        [ 3.7132738 ,  2.5828419 ,  4.106408  , ...,  2.0971596 ,
          3.084887  ,  2.1570203 ],
        [ 0.7288364 ,  3.7744353 ,  0.8916555 , ..., -0.8022287 ,
         -0.71034604, -0.80733055],
        ...,
        [ 0.6318349 ,  3.1782212 ,  0.7891128 , ...,  

In [16]:
y

array([1, 1, 1, ..., 0, 0, 0])

In [17]:
# Perform train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("\nTraining set shapes:")
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")


Training set shapes:
X_train shape: (206544, 20, 34)
y_train shape: (206544,)


In [22]:
# Model Definition
model = Sequential([
    LSTM(128, input_shape=(20, 34), return_sequences=True),  # Input shape matches X_train
    Dropout(0.3),
    LSTM(64, return_sequences=True),
    Dropout(0.3),
    LSTM(32, return_sequences=False),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # Binary output
])

# Compile Model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

print(model.summary())

None


In [25]:
early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 10, mode = 'min', restore_best_weights = True)

epochs=60
# Train the model with the LivePlotCallback
history = model.fit(X_train, y_train, epochs=epochs, batch_size=32, validation_data=(X_test, y_test), callbacks = [early_stopping_callback])


model_evaluation_history = model.evaluate(X_test, y_test)

model_evaluation_loss, model_evaluation_accuracy = model_evaluation_history

date_time_format = '%Y_%m_%d__%H_%M_%S'
current_date_time_dt = dt.datetime.now()
current_date_time_string = dt.datetime.strftime(current_date_time_dt, date_time_format)

model_file_name = f'skeletonViolenceLSTM_model___Date_Time_{current_date_time_string}___Loss_{model_evaluation_loss}___Accuracy_{model_evaluation_accuracy}__Epochs_{epochs}.h5'
model_path = os.path.join('models', model_file_name)

model.save(model_path) #! Important

Epoch 1/60
[1m6455/6455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 29ms/step - accuracy: 0.8727 - loss: 0.3037 - val_accuracy: 0.8951 - val_loss: 0.2490
Epoch 2/60
[1m6455/6455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 33ms/step - accuracy: 0.9014 - loss: 0.2420 - val_accuracy: 0.9169 - val_loss: 0.2080
Epoch 3/60
[1m6455/6455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 35ms/step - accuracy: 0.9195 - loss: 0.2029 - val_accuracy: 0.9265 - val_loss: 0.1852
Epoch 4/60
[1m6455/6455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 32ms/step - accuracy: 0.9307 - loss: 0.1781 - val_accuracy: 0.9330 - val_loss: 0.1721
Epoch 5/60
[1m1810/6455[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m3:19[0m 43ms/step - accuracy: 0.9381 - loss: 0.1596