In [None]:
pip install Kaggle

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
! mkdir ~/.kaggle

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [7]:
!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/kaggle.json

In [8]:
! chmod 600 ~/.kaggle/kaggle.json

In [9]:
! kaggle datasets download mansourehk/shemo-persian-speech-emotion-detection-database

Dataset URL: https://www.kaggle.com/datasets/mansourehk/shemo-persian-speech-emotion-detection-database
License(s): unknown
Downloading shemo-persian-speech-emotion-detection-database.zip to /content
100% 828M/829M [00:35<00:00, 25.3MB/s]
100% 829M/829M [00:35<00:00, 24.5MB/s]


In [10]:
!mkdir persian-speech-emotion-detection-database

mkdir: cannot create directory ‘persian-speech-emotion-detection-database’: File exists


In [11]:
! unzip /content/shemo-persian-speech-emotion-detection-database.zip -d persian-speech-emotion-detection-database

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: persian-speech-emotion-detection-database/transcript/final script/F24A22.tra  
  inflating: persian-speech-emotion-detection-database/transcript/final script/F24A23.tra  
  inflating: persian-speech-emotion-detection-database/transcript/final script/F24A24.tra  
  inflating: persian-speech-emotion-detection-database/transcript/final script/F24A25.tra  
  inflating: persian-speech-emotion-detection-database/transcript/final script/F24A26.tra  
  inflating: persian-speech-emotion-detection-database/transcript/final script/F24A27.tra  
  inflating: persian-speech-emotion-detection-database/transcript/final script/F24A28.tra  
  inflating: persian-speech-emotion-detection-database/transcript/final script/F24A29.tra  
  inflating: persian-speech-emotion-detection-database/transcript/final script/F24A30.tra  
  inflating: persian-speech-emotion-detection-database/transcript/final script/F24A31.tra  
  inflating: pe

In [None]:
pip install resampy


In [12]:
import librosa
import  IPython.display as ipd
import matplotlib.pyplot as plt

In [24]:
addr_voice = '/content/persian-speech-emotion-detection-database/female/F01S23.wav'

In [25]:
ipd.Audio(addr_voice)

In [None]:
import os
import shutil
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking
from tensorflow.keras.utils import to_categorical

source_male = "persian-speech-emotion-detection-database/male"
source_female = "persian-speech-emotion-detection-database/female"
destination = "persian-speech-emotion-detection-database/train"

os.makedirs(destination, exist_ok=True)

for filename in os.listdir(source_male):
    if filename.endswith(".wav"):
        shutil.copy(os.path.join(source_male, filename), os.path.join(destination, filename))

for filename in os.listdir(source_female):
    if filename.endswith(".wav"):
        shutil.copy(os.path.join(source_female, filename), os.path.join(destination, filename))

print("Files copied successfully!")

# Function to extract and pad MFCC features
def extract_and_pad_mfcc(audio_path, max_len=200, n_mfcc=20):
    try:
        y, sr = librosa.load(audio_path)
    except Exception as e:
        raise ValueError(f"Error loading audio file: {audio_path} - {e}")

    mfcc_features = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc).T

    if len(mfcc_features) > max_len:
        mfcc_features = mfcc_features[:max_len]
    else:
        mfcc_features = np.pad(mfcc_features, ((0, max_len - len(mfcc_features)), (0, 0)), mode='constant', constant_values=0)

    return mfcc_features

# Extract and pad MFCC features for all audio files in the train folder
mfcc_data = []
labels = []
for filename in os.listdir("persian-speech-emotion-detection-database/train"):
    if filename.endswith(".wav"):
        audio_path = os.path.join("persian-speech-emotion-detection-database/train", filename)
        try:
            mfcc_features = extract_and_pad_mfcc(audio_path)
            mfcc_data.append(mfcc_features)
            labels.append(filename[1])  # Assuming the label is the second character of the file name as per your document
        except ValueError as e:
            print(f"Error processing file {filename}: {e}")

mfcc_data = np.array(mfcc_data)
labels = np.array(labels)

# Check unique labels
unique_labels = np.unique(labels)
print(f"Unique labels found: {unique_labels}")

# Normalize features
scaler = StandardScaler()
mfcc_data = mfcc_data.reshape(-1, mfcc_data.shape[-1])  # Reshape to 2D for scaling
mfcc_data = scaler.fit_transform(mfcc_data)
mfcc_data = mfcc_data.reshape(-1, 200, 20)  # Reshape back to 3D

# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)
print(f"Encoded labels: {labels_encoded}")

# Verify the number of classes
num_classes = len(np.unique(labels_encoded))
print(f"Number of classes: {num_classes}")

labels_categorical = to_categorical(labels_encoded, num_classes=num_classes)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(mfcc_data, labels_categorical, test_size=0.2, random_state=42)

# Define the LSTM model
model = Sequential()
model.add(Masking(mask_value=0.0, input_shape=(200, 20)))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(64, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(32))
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))  # Use num_classes determined from the labels

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Save the model
model.save('emotion_classification_model.h5')

print("Model trained and saved successfully!")

In [83]:
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the trained model
model = load_model('emotion_classification_model.h5')

# Make predictions on the test set
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Calculate precision, recall, and F1-score
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

Accuracy: 0.6700
Precision: 0.6689
Recall: 0.6700
F1-score: 0.6670
