In [1]:
import os
import numpy as np
import pandas as pd
import librosa
import dvc

In [2]:
COLUMNS = ['audio_path', 'audio_len', 'language', 'language_family', 'gender', 'emotion']
DATA_PATH = 'D:/licencjat/data/databases'
DF_PATH = 'D:/licencjat/data/dataframes'

RAVDNESS

Filename identifiers:

Modality (01 = full-AV, 02 = video-only, 03 = audio-only).

Vocal channel (01 = speech, 02 = song).

Emotion (01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised).

Emotional intensity (01 = normal, 02 = strong). NOTE: There is no strong intensity for the 'neutral' emotion.

Statement (01 = "Kids are talking by the door", 02 = "Dogs are sitting by the door").

Repetition (01 = 1st repetition, 02 = 2nd repetition).

Actor (01 to 24. Odd numbered actors are male, even numbered actors are female).

Filename example: 03-01-06-01-02-01-12.wav

Audio-only (03)
Speech (01)
Fearful (06)
Normal intensity (01)
Statement "dogs" (02)
1st Repetition (01)
12th Actor (12)
Female, as the actor ID number is even.

In [110]:
ravdness_path = f'{DATA_PATH}/ravdness'
ravdness_df = pd.DataFrame(columns=COLUMNS)
ravdness_language = 'enUS'
ravdness_language_family = 'Indo-European'

emotion_map_ravdness = {'01' : 'neutral', 
                        '02' : 'neutral', 
                        '03' : 'happy', 
                        '04' : 'sad', 
                        '05' : 'angry',
                        '06' : 'fearful',
                        '07' : 'disgusted',
                        '08' : 'surprised'}

In [None]:
for actor in os.listdir(ravdness_path):
    for audio_filename in os.listdir(f'{ravdness_path}/{actor}'):
        audio_path = f'{ravdness_path}/{actor}/{audio_filename}'
        
        audio_filename_splitted = audio_filename.split('-')
        emotion = emotion_map_ravdness[audio_filename_splitted[2]]
        audio_file, sr = librosa.load(audio_path)
        audio_len = str(f'{len(audio_file) / sr}')

        if int(audio_filename_splitted[-1][:-4]) % 2 == 0:
            gender = 'female'
        else:
            gender = 'male'
        
        df_row = pd.Series({'audio_path' : audio_path, 'audio_len' : audio_len, 'language' : ravdness_language, 'language_family' : ravdness_language_family, 'gender': gender, 'emotion' : emotion})
        ravdness_df = ravdness_df.append(df_row, ignore_index=True)

In [130]:
ravdness_df.to_csv(f'{DF_PATH}/ravdness_df.csv')
ravdness_df.head()

Unnamed: 0,audio_path,audio_len,language,language_family,gender,emotion
0,D:/licencjat/data/databases/ravdness/Actor_01/...,3.303310657596372,enUS,Indo-European,male,neutral
1,D:/licencjat/data/databases/ravdness/Actor_01/...,3.336689342403628,enUS,Indo-European,male,neutral
2,D:/licencjat/data/databases/ravdness/Actor_01/...,3.269931972789116,enUS,Indo-European,male,neutral
3,D:/licencjat/data/databases/ravdness/Actor_01/...,3.1698412698412697,enUS,Indo-European,male,neutral
4,D:/licencjat/data/databases/ravdness/Actor_01/...,3.53687074829932,enUS,Indo-European,male,neutral


Berlin EmoDB

In [131]:
emodb_path = f'{DATA_PATH}/Berlin_EMODB/wav'
emodb_df = pd.DataFrame(columns=COLUMNS)
emodb_language = 'deDE'
emodb_language_family = 'Indo-European'

speaker_gender_map_emodb = {'03' : 'male',
                            '08' : 'female',
                            '09' : 'female',
                            '10' : 'male',
                            '11' : 'male',
                            '12' : 'male',
                            '13' : 'female',
                            '14' : 'female',
                            '15' : 'male',
                            '16' : 'female'}

emotion_map_emodb = {'W' : 'angry', 
                     'L' : 'bored', 
                     'E' : 'disgusted',
                     'A' : 'fearful', 
                     'F' : 'happy',
                     'T' : 'sad',
                     'N' : 'neutral'}

In [None]:
for audio_filename in os.listdir(emodb_path):
    audio_path = f'{emodb_path}/{audio_filename}'
    speaker = audio_filename[:2]

    audio_file, sr = librosa.load(audio_path)
    audio_len = str(f'{len(audio_file) / sr}')
    
    gender = speaker_gender_map_emodb[speaker]
    emotion = emotion_map_emodb[audio_filename[5]]
    
    df_row = pd.Series({'audio_path' : audio_path, 'audio_len' : audio_len, 'language' : emodb_language, 'language_family' : emodb_language_family, 'gender': gender, 'emotion' : emotion})
    emodb_df = emodb_df.append(df_row, ignore_index=True)

In [140]:
emodb_df.to_csv(f'{DF_PATH}/berlin_emodb_df.csv')
emodb_df.head()

Unnamed: 0,audio_path,audio_len,language,language_family,gender,emotion
0,D:/licencjat/data/databases/Berlin_EMODB/wav/0...,1.8982766439909295,deDE,Indo-European,male,happy
1,D:/licencjat/data/databases/Berlin_EMODB/wav/0...,1.6112925170068026,deDE,Indo-European,male,neutral
2,D:/licencjat/data/databases/Berlin_EMODB/wav/0...,1.8778231292517007,deDE,Indo-European,male,angry
3,D:/licencjat/data/databases/Berlin_EMODB/wav/0...,2.0062585034013605,deDE,Indo-European,male,happy
4,D:/licencjat/data/databases/Berlin_EMODB/wav/0...,1.4398185941043085,deDE,Indo-European,male,neutral


SAVEE

In [136]:
savee_path = f'{DATA_PATH}/savee/ALL'
savee_df = pd.DataFrame(columns=COLUMNS)
savee_language = 'enGB'
savee_language_family = 'Indo-European'
gender = 'male'

emotion_map_savee = {'n' : 'neutral', 
                     'a' : 'angry', 
                     'd' : 'disgusted',
                     'f' : 'fearful', 
                     'h' : 'happy',
                     'sa' : 'sad',
                     'su' : 'surprised'}

In [None]:
for audio_filename in os.listdir(savee_path):
    audio_path = f'{savee_path}/{audio_filename}'
    audio_filename_splitted = audio_filename.split('_')
    emotion = emotion_map_savee[audio_filename_splitted[1][:-6]]
    audio_file, sr = librosa.load(audio_path)
    audio_len = str(f'{len(audio_file) / sr}')
    
    df_row = pd.Series({'audio_path' : audio_path, 'audio_len' : audio_len, 'language' : savee_language, 'language_family' : savee_language_family, 'gender': gender, 'emotion' : emotion})
    savee_df = savee_df.append(df_row, ignore_index=True)

In [118]:
savee_df.to_csv(f'{DF_PATH}/savee_df.csv')
savee_df.head()

Unnamed: 0,audio_path,audio_len,language,language_family,gender,emotion
0,D:/licencjat/data/databases/savee/ALL/DC_a01.wav,3.6478004535147392,enGB,Indo-European,male,angry
1,D:/licencjat/data/databases/savee/ALL/DC_a02.wav,3.438140589569161,enGB,Indo-European,male,angry
2,D:/licencjat/data/databases/savee/ALL/DC_a03.wav,2.0755102040816324,enGB,Indo-European,male,angry
3,D:/licencjat/data/databases/savee/ALL/DC_a04.wav,2.7253514739229026,enGB,Indo-European,male,angry
4,D:/licencjat/data/databases/savee/ALL/DC_a05.wav,2.9979138321995467,enGB,Indo-European,male,angry


TESS

In [141]:
tess_path = f'{DATA_PATH}/TESS'
tess_df = pd.DataFrame(columns=COLUMNS)
tess_language = 'enCA'
tess_language_family = 'Indo-European'

In [None]:
for actor in os.listdir(tess_path):
    for audio_filename in os.listdir(f'{tess_path}/{actor}'):
        audio_path = f'{tess_path}/{actor}/{audio_filename}'
        audio_filename_splitted = audio_filename.split('_') 

        emotion = audio_filename_splitted[2][:-4]

        if emotion == 'fear':
            emotion = 'fearful'
        if emotion == 'ps':
            emotion = 'surprised'
        if emotion == 'disgust':
            emotion = 'disgusted'
        
        audio_file, sr = librosa.load(audio_path)
        audio_len = str(f'{len(audio_file) / sr}')

        df_row = pd.Series({'audio_path' : audio_path, 'audio_len' : audio_len, 'language' : tess_language, 'language_family' : tess_language_family, 'gender': gender, 'emotion' : emotion})
        tess_df = tess_df.append(df_row, ignore_index=True)

In [143]:
tess_df.to_csv(f'{DF_PATH}/tess_df.csv')
tess_df.head()

Unnamed: 0,audio_path,audio_len,language,language_family,gender,emotion
0,D:/licencjat/data/databases/TESS/OAF_angry/OAF...,1.539047619047619,enCA,Indo-European,female,angry
1,D:/licencjat/data/databases/TESS/OAF_angry/OAF...,1.6166439909297052,enCA,Indo-European,female,angry
2,D:/licencjat/data/databases/TESS/OAF_angry/OAF...,1.542040816326531,enCA,Indo-European,female,angry
3,D:/licencjat/data/databases/TESS/OAF_angry/OAF...,1.606893424036281,enCA,Indo-European,female,angry
4,D:/licencjat/data/databases/TESS/OAF_angry/OAF...,1.3951927437641722,enCA,Indo-European,female,angry


EMOVO

In [12]:
emovo_path = f'{DATA_PATH}/emovo'
emovo_df = pd.DataFrame(columns=COLUMNS)
emovo_language = 'itIT'
emovo_language_family = 'Indo-European'

emotion_map_emovo = {'neu' : 'neutral', 
                     'rab' : 'angry', 
                     'dis' : 'disgusted',
                     'pau' : 'fearful', 
                     'gio' : 'happy',
                     'tri' : 'sad',
                     'sor' : 'surprised'}

gender_map_emovo = {'f' : 'female',
                    'm' : 'male'}

In [None]:
for actor in os.listdir(emovo_path):
    for audio_filename in os.listdir(f'{emovo_path}/{actor}'):
        audio_path = f'{emovo_path}/{actor}/{audio_filename}'
        audio_filename_splitted = audio_filename.split('-')

        gender = gender_map_emovo[audio_filename_splitted[1][0]]
        emotion = emotion_map_emovo[audio_filename_splitted[0]]

        audio_file, sr = librosa.load(audio_path)
        audio_len = str(f'{len(audio_file) / sr}')

        df_row = pd.Series({'audio_path' : audio_path, 'audio_len' : audio_len, 'language' : emovo_language, 'language_family' : emovo_language_family, 'gender': gender, 'emotion' : emotion})
        emovo_df = emovo_df.append(df_row, ignore_index=True)

In [None]:
emovo_df.to_csv(f'{DF_PATH}/emovo_df.csv')
emovo_df.head()

AESDD

In [4]:
aesdd_path = f'{DATA_PATH}/AESDD'
aesdd_df = pd.DataFrame(columns=COLUMNS)
aesdd_language = 'grGR'
aesdd_language_family = 'Indo-European'

emotion_map_aesdd = {'a' : 'angry', 
                     'd' : 'disgusted', 
                     'f' : 'fearful',
                     'h' : 'happy', 
                     's' : 'sad'}

gender_map_aesdd = {'1' : 'female',
                    '2' : 'female',
                    '3' : 'male',
                    '4' : 'male',
                    '5' : 'female',
                    '6' : 'male'}

In [None]:
for subdir in os.listdir(aesdd_path):
    for audio_filename in os.listdir(f'{aesdd_path}/{subdir}'):
        audio_path = f'{aesdd_path}/{subdir}/{audio_filename}'
        audio_filename_splitted = audio_filename.split()

        gender = gender_map_aesdd[audio_filename_splitted[1].strip('(0)b.wav')]
        emotion = emotion_map_aesdd[audio_filename_splitted[0][0]]

        audio_file, sr = librosa.load(audio_path)
        audio_len = str(f'{len(audio_file) / sr}')

        df_row = pd.Series({'audio_path' : audio_path, 'audio_len' : audio_len, 'language' : aesdd_language, 'language_family' : aesdd_language_family, 'gender': gender, 'emotion' : emotion})
        aesdd_df = aesdd_df.append(df_row, ignore_index=True)

In [15]:
aesdd_df.to_csv(f'{DF_PATH}/aesdd_df.csv')
aesdd_df.head()

Unnamed: 0,audio_path,audio_len,language,language_family,gender,emotion
0,D:/licencjat/data/databases/AESDD/anger/a01 (1...,4.149115646258504,grGR,Indo-European,female,angry
1,D:/licencjat/data/databases/AESDD/anger/a01 (1...,4.149115646258504,grGR,Indo-European,female,angry
2,D:/licencjat/data/databases/AESDD/anger/a01 (1...,4.149115646258504,grGR,Indo-European,female,angry
3,D:/licencjat/data/databases/AESDD/anger/a01 (2...,3.477868480725624,grGR,Indo-European,female,angry
4,D:/licencjat/data/databases/AESDD/anger/a01 (3...,4.0,grGR,Indo-European,male,angry


MESD

In [15]:
mesd_path = f'{DATA_PATH}/MESD'
mesd_df = pd.DataFrame(columns=COLUMNS)
mesd_language = 'esMX'
mesd_language_family = 'Indo-European'

emotion_map_mesd = {'Anger' : 'angry', 
                    'Disgust' : 'disgusted', 
                    'Fear' : 'fearful',
                    'Happiness' : 'happy', 
                    'Sadness' : 'sad',
                    'Neutral' : 'neutral'}

gender_map_mesd = {'M' : 'male',
                    'F' : 'female'}

In [None]:
for audio_filename in os.listdir(mesd_path):
    audio_path = f'{mesd_path}/{audio_filename}'

    audio_filename_splitted = audio_filename.split('_')
    gender = audio_filename_splitted[1]

    if gender == 'M' or gender == 'F':
        gender = gender_map_mesd[gender]
        emotion = emotion_map_mesd[audio_filename_splitted[0]]

        audio_file, sr = librosa.load(audio_path)
        audio_len = str(f'{len(audio_file) / sr}')
        
        df_row = pd.Series({'audio_path' : audio_path, 'audio_len' : audio_len, 'language' : mesd_language, 'language_family' : mesd_language_family, 'gender': gender, 'emotion' : emotion})
        mesd_df = mesd_df.append(df_row, ignore_index=True)

In [20]:
mesd_df.to_csv(f'{DF_PATH}/mesd_df.csv')
mesd_df.head()

Unnamed: 0,audio_path,audio_len,language,language_family,gender,emotion
0,D:/licencjat/data/databases/MESD/Anger_F_A_aba...,0.6680725623582766,esMX,Indo-European,female,angry
1,D:/licencjat/data/databases/MESD/Anger_F_A_adi...,0.4980952380952381,esMX,Indo-European,female,angry
2,D:/licencjat/data/databases/MESD/Anger_F_A_ant...,0.8148299319727891,esMX,Indo-European,female,angry
3,D:/licencjat/data/databases/MESD/Anger_F_A_arr...,0.766984126984127,esMX,Indo-European,female,angry
4,D:/licencjat/data/databases/MESD/Anger_F_A_aye...,0.7198639455782313,esMX,Indo-European,female,angry


CaFE

In [22]:
cafe_path = f'{DATA_PATH}/CaFE'
cafe_df = pd.DataFrame(columns=COLUMNS)
cafe_language = 'frCA'
cafe_language_family = 'Indo-European'

emotion_map_cafe = {'C' : 'angry', 
                    'D' : 'disgusted', 
                    'J' : 'happy',
                    'N' : 'neutral', 
                    'P' : 'fearful',
                    'S' : 'surprised',
                    'T' : 'sad'}

In [None]:
for e in os.listdir(cafe_path):
    for intensity in os.listdir(f'{cafe_path}/{e}'):
        for audio_filename in os.listdir(f'{cafe_path}/{e}/{intensity}'):
            audio_path = f'{cafe_path}/{e}/{intensity}/{audio_filename}'
            audio_filename_splitted = audio_filename.split('-')

            if int(audio_filename_splitted[0]) % 2 == 0:
                gender = 'female'
            else:
                gender = 'male'

            emotion = emotion_map_cafe[audio_filename_splitted[1]]

            audio_file, sr = librosa.load(audio_path)
            audio_len = str(f'{len(audio_file) / sr}')

            df_row = pd.Series({'audio_path' : audio_path, 'audio_len' : audio_len, 'language' : cafe_language, 'language_family' : cafe_language_family, 'gender': gender, 'emotion' : emotion})
            cafe_df = cafe_df.append(df_row, ignore_index=True)

In [33]:
cafe_df.to_csv(f'{DF_PATH}/cafe_df.csv')
cafe_df.head()

Unnamed: 0,audio_path,audio_len,language,language_family,gender,emotion
0,D:/licencjat/data/databases/CaFE/ColŐre/Faible...,0.755328798185941,frCA,Indo-European,male,angry
1,D:/licencjat/data/databases/CaFE/angry/Faible/...,0.755328798185941,frCA,Indo-European,male,angry
2,D:/licencjat/data/databases/CaFE/angry/Faible/...,0.755328798185941,frCA,Indo-European,male,angry
3,D:/licencjat/data/databases/CaFE/angry/Faible/...,0.755328798185941,frCA,Indo-European,male,angry
4,D:/licencjat/data/databases/CaFE/angry/Faible/...,0.755328798185941,frCA,Indo-European,male,angry


In [144]:
df = pd.DataFrame(columns=COLUMNS)

df = df.append(ravdness_df, ignore_index=True)
df = df.append(emodb_df, ignore_index=True)
df = df.append(savee_df, ignore_index=True)
df = df.append(tess_df, ignore_index=True)
df = df.append(emovo_df, ignore_index=True)

  df = df.append(ravdness_df, ignore_index=True)
  df = df.append(emodb_df, ignore_index=True)
  df = df.append(savee_df, ignore_index=True)
  df = df.append(tess_df, ignore_index=True)
  df = df.append(emovo_df, ignore_index=True)


In [145]:
df = df.sample(frac=1, ignore_index=True)

In [127]:
df.head(50)

Unnamed: 0,audio_path,audio_len,language,language_family,gender,emotion
0,D:/licencjat/data/databases/TESS/OAF_disgust/O...,2.2741496598639457,enCA,Indo-European,male,disgusted
1,D:/licencjat/data/databases/TESS/OAF_Pleasant_...,1.9848979591836733,enCA,Indo-European,male,surprised
2,D:/licencjat/data/databases/ravdness/Actor_11/...,3.5035374149659866,enUS,Indo-European,male,happy
3,D:/licencjat/data/databases/TESS/YAF_fear/YAF_...,1.820907029478458,enCA,Indo-European,male,fearful
4,D:/licencjat/data/databases/TESS/OAF_Pleasant_...,1.9914285714285715,enCA,Indo-European,male,surprised
5,D:/licencjat/data/databases/TESS/YAF_happy/YAF...,1.8750113378684807,enCA,Indo-European,male,happy
6,D:/licencjat/data/databases/TESS/YAF_neutral/Y...,2.0259410430839004,enCA,Indo-European,male,neutral
7,D:/licencjat/data/databases/ravdness/Actor_20/...,3.6369614512471657,enUS,Indo-European,female,neutral
8,D:/licencjat/data/databases/ravdness/Actor_05/...,4.304308390022676,enUS,Indo-European,male,neutral
9,D:/licencjat/data/databases/TESS/OAF_Fear/OAF_...,1.5410430839002267,enCA,Indo-European,male,fearful


In [146]:
df.groupby('emotion').count()['audio_path']

emotion
angry        1182
bored         162
disgusted    1020
fearful      1066
happy        1070
neutral      1338
sad          1052
surprised     928
Name: audio_path, dtype: int64

In [155]:
df = df.loc[df['emotion'].isin(['angry', 'fearful', 'happy', 'neutral', 'sad'])]
df.head(50)

Unnamed: 0,audio_path,audio_len,language,language_family,gender,emotion
0,D:/licencjat/data/databases/TESS/OAF_Fear/OAF_...,1.4480725623582766,enCA,Indo-European,female,fearful
2,D:/licencjat/data/databases/ravdness/Actor_18/...,4.237596371882086,enUS,Indo-European,female,angry
5,D:/licencjat/data/databases/ravdness/Actor_15/...,3.703718820861678,enUS,Indo-European,male,angry
7,D:/licencjat/data/databases/TESS/OAF_neutral/O...,1.8376417233560087,enCA,Indo-European,female,neutral
8,D:/licencjat/data/databases/TESS/YAF_angry/YAF...,1.956281179138322,enCA,Indo-European,female,angry
9,D:/licencjat/data/databases/TESS/OAF_happy/OAF...,1.7905668934240362,enCA,Indo-European,female,happy
10,D:/licencjat/data/databases/TESS/OAF_angry/OAF...,1.3951927437641722,enCA,Indo-European,female,angry
11,D:/licencjat/data/databases/ravdness/Actor_21/...,4.237596371882086,enUS,Indo-European,male,angry
13,D:/licencjat/data/databases/ravdness/Actor_21/...,4.13750566893424,enUS,Indo-European,male,angry
14,D:/licencjat/data/databases/Berlin_EMODB/wav/1...,1.7219501133786848,deDE,Indo-European,male,neutral


In [156]:
df.groupby('emotion').count()['audio_path'] 

emotion
angry      1182
fearful    1066
happy      1070
neutral    1338
sad        1052
Name: audio_path, dtype: int64

In [157]:
df.groupby('language').count()['audio_path'] 

language
deDE     816
enCA    2000
enGB     360
enUS    2112
itIT     420
Name: audio_path, dtype: int64

In [158]:
df.groupby('gender').count()['audio_path'] 

gender
female    3708
male      2000
Name: audio_path, dtype: int64