In [5]:
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import numpy as np
import os
import cv2
import random

# Định nghĩa DataGenerator
class VideoDataGenerator(Sequence):
    def __init__(self, video_paths, labels, batch_size=4, frame_count=16, frame_size=(224, 224), shuffle=True):
        self.video_paths = video_paths
        self.labels = labels
        self.batch_size = batch_size
        self.frame_count = frame_count
        self.frame_size = frame_size
        self.shuffle = shuffle
        self.on_epoch_end()
    
    def __len__(self):
        return int(np.floor(len(self.video_paths) / self.batch_size))
    
    def __getitem__(self, index):
        batch_video_paths = self.video_paths[index * self.batch_size:(index + 1) * self.batch_size]
        batch_labels = self.labels[index * self.batch_size:(index + 1) * self.batch_size]

        X, y = self.__data_generation(batch_video_paths, batch_labels)
        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            temp = list(zip(self.video_paths, self.labels))
            random.shuffle(temp)
            self.video_paths, self.labels = zip(*temp)

    def __data_generation(self, batch_video_paths, batch_labels):
        X = np.zeros((self.batch_size, self.frame_count, *self.frame_size, 3), dtype=np.float32)
        y = np.array(batch_labels, dtype=np.float32)

        for i, video_path in enumerate(batch_video_paths):
            frames = self.load_video_frames(video_path)  # Gọi hàm load_video_frames
            X[i] = frames / 255.0  # Chuẩn hóa về [0,1]

        return X, y

    def load_video_frames(self, video_path):
        """Hàm này sẽ đọc video và trích xuất các khung hình phù hợp"""
        cap = cv2.VideoCapture(video_path)
        frames = []
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        if total_frames < self.frame_count:
            print(f"⚠️ Cảnh báo: Video {video_path} có {total_frames} frame, ít hơn {self.frame_count}.")

        step = max(1, total_frames // self.frame_count)

        for i in range(self.frame_count):
            cap.set(cv2.CAP_PROP_POS_FRAMES, i * step)
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, self.frame_size)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame)

        cap.release()

        while len(frames) < self.frame_count:
            frames.append(frames[-1] if frames else np.zeros((*self.frame_size, 3), dtype=np.uint8))

        return np.array(frames, dtype=np.float32)

In [6]:
dataset_path = "./dataset"
video_paths = []
labels = []

for label, class_id in [("Violence", 1), ("NonViolence", 0)]:  # Giữ nhãn chính xác
    class_path = os.path.join(dataset_path, label)
    for file in os.listdir(class_path):
        if file.endswith((".mp4", ".avi")):  # Hỗ trợ cả .mp4 và .avi
            video_paths.append(os.path.join(class_path, file))
            labels.append(class_id)  # Nhãn chính xác: 1 = bạo lực, 0 = không bạo lực


In [7]:
video_paths_train, video_paths_test, labels_train, labels_test = train_test_split(video_paths, labels, test_size=0.2, random_state=42)

# Tạo generator
batch_size = 4
train_generator = VideoDataGenerator(video_paths_train, labels_train, batch_size=batch_size)
test_generator = VideoDataGenerator(video_paths_test, labels_test, batch_size=batch_size, shuffle=False)

# Xây dựng mô hình CNN 3D
model = Sequential([
    Conv3D(32, kernel_size=(3,3,3), activation="relu", input_shape=(16, 224, 224, 3)),
    MaxPooling3D(pool_size=(1,2,2)),
    Conv3D(64, kernel_size=(3,3,3), activation="relu"),
    MaxPooling3D(pool_size=(1,2,2)),
    Conv3D(128, kernel_size=(3,3,3), activation="relu"),
    MaxPooling3D(pool_size=(2,2,2)),
    Flatten(),
    Dense(256, activation="relu"),
    Dropout(0.5),
    Dense(1, activation="sigmoid")  # 1 = Bạo lực, 0 = Không bạo lực
])

model.compile(optimizer=Adam(learning_rate=0.0001), loss="binary_crossentropy", metrics=["accuracy"])

print(model.summary())  # Kiểm tra kiến trúc mô hình

None


In [None]:
# Huấn luyện mô hình với generator
model.fit(train_generator, epochs=10, validation_data=test_generator)

# Lưu mô hình
model.save("3k-224-16.keras", save_format="keras")

In [None]:
# Dự đoán trên tập test
y_pred_prob = model.predict(test_generator)
y_pred = (y_pred_prob > 0.5).astype(int)  # Chuyển đổi xác suất thành nhãn (0 hoặc 1)
y_test = np.array(labels_test[:len(y_pred)])  # Chỉ lấy số lượng tương ứng

# Tính toán các chỉ số đánh giá
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print("Confusion Matrix:")
print(conf_matrix)