In [3]:
import librosa
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras import layers,models
from joblib import dump

In [None]:
def ravdess_Xy():
    emotion_map = {
        '01': 'neutral',
        '02': 'calm',
        '03': 'happy',
        '04': 'sad',
        '05': 'angry',
        '06': 'fear',
        '07': 'disgust',
        '08': 'surprised'
    }

    ravdess_path = 'ses/ses_dosyalarim_ravdes'

    
    X = []  
    y = []  

    for filename in os.listdir(ravdess_path):
        if filename.endswith('.wav'):
            file_path = os.path.join(ravdess_path, filename)
            
            emotion_code = filename.split('-')[2]
            emotion_label = emotion_map.get(emotion_code)

            # ses dosyasını oku
            y_audio, sr = librosa.load(file_path, sr=16000)
            y_audio, _ = librosa.effects.trim(y_audio)
            y_audio = librosa.util.normalize(y_audio)

            # mfcc çıkar 
            mfcc = librosa.feature.mfcc(y=y_audio, sr=sr, n_mfcc=20
            
            
            )

            # zaaman boyutunu 100'e sabitle 
            if mfcc.shape[1] >= 100:
                mfcc = mfcc[:, :100]
            else:
                pad_width = 100 - mfcc.shape[1]
                mfcc = np.pad(mfcc, ((0, 0), (0, pad_width)), mode='constant')

            # listeye ekle
            X.append(mfcc)
            y.append(emotion_label)

    # numpy dizisine çevir
    X = np.array(X)
    y = np.array(y)

    
    return X,y


In [5]:
def tess_Xy():
    tess_path = 'ses/ses_dosyalarim_tess/TESS Toronto emotional speech set data'

    X1 = []
    y1 = []
    
    for filename in os.listdir(tess_path):
        folder_path = os.path.join(tess_path,filename)
        
        for file in os.listdir(folder_path):
            if file.endswith('.wav'):
                file_path = os.path.join(folder_path,file)
                emotion = file.split('_')[2].split('.')[0]

            
            
                y_audio, sr = librosa.load(file_path,sr=16000)
                y_audio, _ = librosa.effects.trim(y_audio)
                y_audio = librosa.util.normalize(y_audio)

                mfcc = librosa.feature.mfcc(y=y_audio, sr=sr, n_mfcc=20)
                if mfcc.shape[1] >= 100:
                    mfcc = mfcc[:, :100]
                else:
                    pad_width = 100 - mfcc.shape[1]
                    mfcc = np.pad(mfcc, ((0, 0), (0, pad_width)), mode='constant')

                X1.append(mfcc)
                #ps = pleasent surprised
                if(emotion=='ps'):
                    y1.append('surprised')
                else:   
                    y1.append(emotion)

    X1 = np.array(X1)
    y1 = np.array(y1)
    print(X1.shape)    
    
    return X1,y1


In [6]:


X,y = ravdess_Xy()
X1,y1 = tess_Xy()


(2800, 20, 100)


In [7]:
#datasetleri combine etme
combined = np.concatenate([X,X1], axis=0)
combined_y = np.concatenate([y,y1],axis=0)

In [8]:
from sklearn.calibration import LabelEncoder
label_encoder1 = LabelEncoder()
label_encoder2 = LabelEncoder()
label_encoder3 = LabelEncoder()

#etiketleri sayısal değerler çevirme

y_encoded = label_encoder1.fit_transform(combined_y)
y1_encoded = label_encoder2.fit_transform(y)
y2_encoded = label_encoder3.fit_transform(y1)

dump(label_encoder1, "label_encoder.joblib")

['label_encoder.joblib']

In [9]:
#cnn girdisi için her datasete kanal boyutu ekleme
combined= combined[..., np.newaxis]
X = X[...,np.newaxis]
X1 = X1[...,np.newaxis] 
print(combined.shape)

(4240, 20, 100, 1)


In [10]:
#her dataset için train ve test modeli ayarlama
from sklearn.model_selection import train_test_split
X1_train, X1_test , y1_train , y1_test = train_test_split(X,y1_encoded,test_size=0.2,random_state=42)
X2_train, X2_test , y2_train , y2_test = train_test_split(X1,y2_encoded,test_size=0.2,random_state=42)
X_train, X_test, y_train, y_test = train_test_split(combined, y_encoded, test_size=0.2, random_state=42)
print(X_train.shape)

(3392, 20, 100, 1)


In [None]:
from tensorflow.keras import layers, models
from tensorflow.keras.layers import Reshape

def build_model(input_shape):

    model = models.Sequential()
    # 1. cnn katmanı
    model.add(layers.Conv2D(32, (5, 5), activation='relu', input_shape=input_shape))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.3))

    # 2. cnn katmanı
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.3))

    
    #lstm girdisi için boyut ayarlama (cnn çıktısına göre ayarlanmıştır)
    model.add(Reshape((23,192)))
    # lstm katmanı
    model.add(layers.LSTM(128))
    model.add(layers.Dropout(0.3))

    model.summary()

    # tam bağlantılı katman(sınıflandıran katman)
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dropout(0.3))
    model.add(layers.Dense(len(np.unique(y_encoded)), activation='softmax'))  # sınıf sayısı
    model.summary()

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


In [12]:
print(X_train.shape)
print(X1_train.shape)
print(X2_train.shape)

model1 = build_model(X_train.shape[1:])
model2 = build_model(X1_train.shape[1:])
model3 = build_model(X2_train.shape[1:])






(3392, 20, 100, 1)
(1152, 20, 100, 1)
(2240, 20, 100, 1)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
model1.fit(X_train, y_train, epochs=30, batch_size=64, validation_data=(X_test, y_test))

Epoch 1/30
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 146ms/step - accuracy: 0.2155 - loss: 1.9571 - val_accuracy: 0.2594 - val_loss: 2.0726
Epoch 2/30
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 125ms/step - accuracy: 0.5280 - loss: 1.2795 - val_accuracy: 0.5495 - val_loss: 1.2749
Epoch 3/30
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 116ms/step - accuracy: 0.6642 - loss: 0.9326 - val_accuracy: 0.7017 - val_loss: 0.8546
Epoch 4/30
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 124ms/step - accuracy: 0.7138 - loss: 0.7785 - val_accuracy: 0.7217 - val_loss: 0.7543
Epoch 5/30
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 119ms/step - accuracy: 0.7532 - loss: 0.6694 - val_accuracy: 0.7370 - val_loss: 0.7032
Epoch 6/30
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 116ms/step - accuracy: 0.7750 - loss: 0.6116 - val_accuracy: 0.7264 - val_loss: 0.6596
Epoch 7/30
[1m53/53[0m [

<keras.src.callbacks.history.History at 0x29fea002440>

In [14]:
model2.fit(X1_train,y1_train,epochs=30, batch_size=64, validation_data=(X1_test, y1_test))

Epoch 1/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 139ms/step - accuracy: 0.1822 - loss: 2.0779 - val_accuracy: 0.2118 - val_loss: 2.0513
Epoch 2/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 111ms/step - accuracy: 0.2396 - loss: 1.9517 - val_accuracy: 0.2083 - val_loss: 2.0843
Epoch 3/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 110ms/step - accuracy: 0.2664 - loss: 1.8738 - val_accuracy: 0.1910 - val_loss: 2.1354
Epoch 4/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 117ms/step - accuracy: 0.3122 - loss: 1.8015 - val_accuracy: 0.2049 - val_loss: 2.1466
Epoch 5/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 118ms/step - accuracy: 0.3299 - loss: 1.7285 - val_accuracy: 0.2049 - val_loss: 2.1120
Epoch 6/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 107ms/step - accuracy: 0.3655 - loss: 1.7095 - val_accuracy: 0.2326 - val_loss: 2.0272
Epoch 7/30
[1m18/18[0m [3

<keras.src.callbacks.history.History at 0x29fec353790>

In [15]:
model3.fit(X2_train,y2_train ,epochs=30, batch_size=64, validation_data=(X2_test, y2_test))

Epoch 1/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 134ms/step - accuracy: 0.2426 - loss: 1.9077 - val_accuracy: 0.2571 - val_loss: 2.0913
Epoch 2/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 117ms/step - accuracy: 0.6661 - loss: 0.8735 - val_accuracy: 0.3625 - val_loss: 1.8208
Epoch 3/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 116ms/step - accuracy: 0.8772 - loss: 0.3583 - val_accuracy: 0.7643 - val_loss: 0.6211
Epoch 4/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 118ms/step - accuracy: 0.9107 - loss: 0.2558 - val_accuracy: 0.9054 - val_loss: 0.2695
Epoch 5/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 116ms/step - accuracy: 0.9544 - loss: 0.1445 - val_accuracy: 0.9732 - val_loss: 0.0845
Epoch 6/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 119ms/step - accuracy: 0.9682 - loss: 0.1061 - val_accuracy: 0.9804 - val_loss: 0.0716
Epoch 7/30
[1m35/35[0m [3

<keras.src.callbacks.history.History at 0x29fea001cf0>

In [16]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

y_pred = model1.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)


print("doğruluk oranı:", accuracy_score(y_test, y_pred_classes))
print("unique in y_test:", np.unique(y_test))
print("unnique in y_pred:", np.unique(y_pred_classes))
print(classification_report(y_test, y_pred_classes, target_names=label_encoder1.classes_))


[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step
doğruluk oranı: 0.8455188679245284
unique in y_test: [0 1 2 3 4 5 6 7]
unnique in y_pred: [0 1 2 3 4 5 6 7]
              precision    recall  f1-score   support

       angry       0.94      0.85      0.89       113
        calm       0.66      0.86      0.75        44
     disgust       0.94      0.82      0.88       117
        fear       0.81      0.92      0.86       112
       happy       0.85      0.67      0.75       107
     neutral       0.81      0.91      0.86       115
         sad       0.79      0.83      0.81       118
   surprised       0.91      0.89      0.90       122

    accuracy                           0.85       848
   macro avg       0.84      0.85      0.84       848
weighted avg       0.85      0.85      0.85       848



In [None]:
y_pred = model2.predict(X1_test)
y_pred_classes = np.argmax(y_pred, axis=1)


print("doğruluk oranı:", accuracy_score(y1_test, y_pred_classes))
print(classification_report(y1_test, y_pred_classes, target_names=label_encoder2.classes_))

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Doğruluk Oranı: 0.6458333333333334
              precision    recall  f1-score   support

       angry       0.71      0.78      0.74        37
        calm       0.67      0.91      0.77        35
     disgust       0.69      0.69      0.69        35
        fear       0.87      0.47      0.61        43
       happy       0.58      0.41      0.48        37
     neutral       0.36      0.74      0.48        19
         sad       0.56      0.64      0.60        44
   surprised       0.92      0.63      0.75        38

    accuracy                           0.65       288
   macro avg       0.67      0.66      0.64       288
weighted avg       0.69      0.65      0.65       288



In [None]:
y_pred = model3.predict(X2_test)
y_pred_classes = np.argmax(y_pred, axis=1)


print("doğruluk oranı:", accuracy_score(y2_test, y_pred_classes))
print(classification_report(y2_test, y_pred_classes, target_names=label_encoder3.classes_))

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
Doğruluk Oranı: 0.9910714285714286
              precision    recall  f1-score   support

       angry       1.00      1.00      1.00        74
     disgust       0.97      0.99      0.98        78
        fear       1.00      1.00      1.00        89
       happy       1.00      0.99      0.99        77
     neutral       1.00      0.99      0.99        73
         sad       0.98      1.00      0.99        82
   surprised       0.99      0.98      0.98        87

    accuracy                           0.99       560
   macro avg       0.99      0.99      0.99       560
weighted avg       0.99      0.99      0.99       560

