In [5]:
import pandas as pd
import numpy as np
import os
import sys
import glob 
import IPython.display as ipd
import random
import matplotlib.pyplot as plt
import plotly.offline as py
import seaborn as sns
import scipy.stats
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
from sklearn.model_selection import StratifiedShuffleSplit
import librosa
import librosa.display
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
py.init_notebook_mode(connected=True)

In [6]:
## Loading Data
def metadata(basepath):
    dir_list = os.listdir(basepath)
    dir_list.sort()

    ## DataFrame to save metadata of Ravdess audio files
    df = pd.DataFrame(columns=['path', 'source', 'actor', 'gender', 'intensity', 'statement', 'repetition', 'emotion'])
    count = 0

    ## Iterating through the directory, reading audio files, and extracting information from the file name
    for f in os.listdir(basepath):
        filename = f.split('.')[0].split('-')
        if(len(filename)==7):
            path = basepath + f
            src = int(filename[1])
            actor = int(filename[-1])
            emotion = int(filename[2])
            gender = "female" if int(actor)%2 == 0 else "male"
            intensity = 0 if filename[3] == '01' else 1
            statement = 0 if filename[4] == '01' else 1
            repeat = 0 if filename[5] == '01' else 1
        df.loc[count] = [path, src, actor, gender, intensity, statement, repeat, emotion]
        count += 1

    labels = []
    for i in range(len(df)):
        if df.emotion[i] == 1:
            label = "_neutral"
        elif df.emotion[i] == 2:
            label = "_calm"
        elif df.emotion[i] == 3:
            label = "_happy"
        elif df.emotion[i] == 4:
            label = "_sad"
        elif df.emotion[i] == 5:
            label = "_angry"
        elif df.emotion[i] == 6:
            label = "_fearful"
        elif df.emotion[i] == 7:
            label = "_disgust"
        elif df.emotion[i] == 8:
            label = "_surprised"
        else:
            label = "_none"

        # Add gender to the label 
        labels.append(df.loc[i,'gender'] + label)

    df['label'] = labels
    
    return df

In [7]:
df = metadata("Y:/Masters_Content/Deep_Learning/Project/Data/Files/")

In [8]:
## Creating training and test dataframes through stratified sampling, test dataframes size 0.2*(total data)
sss = StratifiedShuffleSplit(n_splits=2, random_state=11, test_size=0.2)
for train_index, test_index in sss.split(df, df.label):
    df_train, df_test = df.iloc[train_index,:], df.iloc[test_index,:]

In [9]:
## Replace indices with path column
df_train.index = df_train.path
df_train = df_train.drop("path", axis=1)

df_test.index = df_test.path
df_test = df_test.drop("path", axis=1)

In [10]:
## Spectrogram class takes the metadata file created in the previous step along with output path and type of data (train, validation, test). 
## Users has the option to specify what kind of spectrograms they want. 
## The class can generate 3 types of spectrograms: Mel Scale, MFCC, and Spectral
## If sample is set to true, the class will just display the required spectrogram of the fist file in the dataset

class Spectrograms():
    def __init__(self, df, datasettype, outputpath, sample=False, augmentation=False, mel=True, mfcc=False):
        self.df = df
        self.augmentation = augmentation
        self.mel = mel
        self.outputpath = outputpath
        self.datasettype = datasettype
        self.sample = sample
        
    def get_spectrograms(self):
        if self.sample:
            x, sample_rate = librosa.load(self.df.index[0])
            self.generate(x, sample_rate, '', 0)
        
        else:
            for file in tqdm(range(self.df.shape[0])):
                emotion = df.iloc[file, -1]
                path = self.outputpath+self.datasettype+"/"+emotion+"/"
                if not os.path.exists(path):
                    os.makedirs(path)

                ## Reading signal from .wav file
                x, sample_rate = librosa.load(self.df.index[file])
                self.generate(x, sample_rate, path, file)

        
    def generate(self, x, sample_rate, path, count):
        mel_features = librosa.feature.melspectrogram(x, sr=sample_rate, n_mels=self.n_mels)
        log_mel_features = librosa.power_to_db(mel_features, ref=np.max)
        fig = plt.figure(figsize=(12,4))
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        librosa.display.specshow(log_mel_features, sr=sample_rate, x_axis='time', y_axis='mel')
        if self.sample:
            plt.show()
        else:
            plt.savefig(path+"melspectrogram_"+str(count)+".jpg")
            plt.close()

In [11]:
df = df_train
augmentation = False
mel = True
outputpath = "Y:/Masters_Content/Deep_Learning/Project/Data/Spectrograms/"
datasettype = "train"
sample = False 

In [12]:
## Mel Spectrogram Sample
spectrograms = Spectrograms(df_train, 'train', 'Y:/Masters_Content/Deep_Learning/Project/Data/Spectrograms/', sample=False)
spectrograms.get_spectrograms()

100%|██████████████████████████████████████████████████████████████████████████████| 1152/1152 [02:08<00:00,  8.98it/s]


In [13]:
outputpath = "Y:/Masters_Content/Deep_Learning/Project/Data/Spectrograms/"
datasettype = "test"

In [15]:
# Saving Spectrograms
train_dct = {}
for i in os.listdir("Y:/Masters_Content/Deep_Learning/Project/Data/Spectrograms/train"):
    cnt = []
    for j in os.listdir("Y:/Masters_Content/Deep_Learning/Project/Data/Spectrograms/train/" + str(i)):
        cnt.append(j)
    train_dct[i] = len(cnt)