In [1]:
import os
import random
import shutil

# Đường dẫn tới thư mục gốc chứa dữ liệu
original_dataset_dir = 'D:/data_analysis/speech_emotion_recognition/data/EnglishDataset/images'

# Đường dẫn tới các thư mục mới
base_dir = 'D:/data_analysis/speech_emotion_recognition/data/EnglishDataset/split_data'
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')

# Tạo các thư mục nếu chưa tồn tại
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Lấy danh sách các lớp từ thư mục gốc
classes = [d for d in os.listdir(original_dataset_dir) if os.path.isdir(os.path.join(original_dataset_dir, d))]

for class_name in classes:
    class_dir = os.path.join(original_dataset_dir, class_name)
    images = os.listdir(class_dir)
    random.shuffle(images)
    
    train_images = images[:int(0.8 * len(images))]
    test_images = images[int(0.8 * len(images)):]
    
    os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)
    
    for image in train_images:
        src = os.path.join(class_dir, image)
        dst = os.path.join(train_dir, class_name, image)
        shutil.copyfile(src, dst)
    
    for image in test_images:
        src = os.path.join(class_dir, image)
        dst = os.path.join(test_dir, class_name, image)
        shutil.copyfile(src, dst)

print('Dữ liệu đã được chia thành công!')


Dữ liệu đã được chia thành công!


In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from collections import Counter

# Xây dựng mô hình CNN cải tiến
def create_model(input_shape, num_classes):
    model = Sequential([
        Input(shape=input_shape),
        Conv2D(32, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),
        
        Conv2D(64, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),
        
        Conv2D(128, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),
        
        Flatten(),
        Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Tạo Data Generator mà không sử dụng validation_split
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    'D:/data_analysis/speech_emotion_recognition/data/EnglishDataset/split_data/train',
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical'
)

validation_generator = validation_datagen.flow_from_directory(
    'D:/data_analysis/speech_emotion_recognition/data/EnglishDataset/split_data/test',
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical'
)

# Lấy số lớp từ train_generator
num_classes = len(train_generator.class_indices)

# Tạo và huấn luyện mô hình
input_shape = (128, 128, 3)
model = create_model(input_shape, num_classes)

steps_per_epoch = train_generator.samples // train_generator.batch_size
validation_steps = validation_generator.samples // validation_generator.batch_size

print(f'Total training samples: {train_generator.samples}')
print(f'Total validation samples: {validation_generator.samples}')

# Đếm số lượng mẫu cho từng nhãn
train_labels_count = Counter(train_generator.labels)
validation_labels_count = Counter(validation_generator.labels)

print("Training samples per label:")
for label, count in train_labels_count.items():
    label_name = list(train_generator.class_indices.keys())[list(train_generator.class_indices.values()).index(label)]
    print(f"{label_name}: {count}")

print("Validation samples per label:")
for label, count in validation_labels_count.items():
    label_name = list(validation_generator.class_indices.keys())[list(validation_generator.class_indices.values()).index(label)]
    print(f"{label_name}: {count}")

# Huấn luyện mô hình với early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    validation_data=validation_generator,
    validation_steps=validation_steps,
    epochs=20,
    callbacks=[early_stopping]
)

# Lưu mô hình đã huấn luyện
model.save('emotion_recognition_model.h5')


Found 8800 images belonging to 6 classes.
Found 2203 images belonging to 6 classes.
Total training samples: 8800
Total validation samples: 2203
Training samples per label:
Angry: 1502
Disgusted: 1502
Fearful: 1502
Happy: 1502
Neutral: 1290
Sad: 1502
Validation samples per label:
Angry: 376
Disgusted: 376
Fearful: 376
Happy: 376
Neutral: 323
Sad: 376
Epoch 1/20


  self._warn_if_super_not_called()


[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m194s[0m 686ms/step - accuracy: 0.2603 - loss: 7.1443 - val_accuracy: 0.1723 - val_loss: 10.8694
Epoch 2/20


  self.gen.throw(typ, value, traceback)


[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.2593 - val_loss: 9.4059
Epoch 3/20
[1m222/275[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m19s[0m 364ms/step - accuracy: 0.3941 - loss: 5.2867

: 