In [1]:
#imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import librosa
import librosa.display
import IPython.display as ipd

from glob import glob
import tensorflow as tf

#to make plots look nice
from itertools import cycle
sns.set_theme(style="white", palette=None)
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])

2022-11-22 15:57:25.929443: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-11-22 15:57:25.929525: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# - Preparing data

## retrieving audio

- this is my local repository, to change it with retrieving code

In [2]:
audio_files = glob("data/Emotion_Dataset/*/*.wav")

## Treating audio

- loading audio into a vector. Using stft to convert it into a spectrogram. Saving all spectrograms to make them our X.

In [3]:
spectrograms = []
for item in audio_files:
    X, sr = librosa.load(item) #vectorizing
    X_trim = librosa.effects.trim(X,top_db=35) #trimming data
    
    #padding
    if X_trim[0].shape[0] >= 48000:
        X_final = X_trim[0][:48000]
        X_final = tf.convert_to_tensor(X_final).numpy()
    else:
        zero_padding = tf.zeros([48000]-tf.shape(X_trim[0]),dtype=tf.float32)
        X_final = tf.concat([X_trim[0],zero_padding],0).numpy()
    
    fourrier = librosa.stft(X_final) #fourrier
    spectrogram = tf.abs(fourrier)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    spectrograms.append(spectrogram)

2022-11-22 15:57:35.209214: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-11-22 15:57:35.209313: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-11-22 15:57:35.209346: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (DESKTOP-23E8V41): /proc/driver/nvidia/version does not exist
2022-11-22 15:57:35.210779: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
X = []
for x in spectrograms:
    X.append(x.numpy())
X= np.array(X)

In [None]:
X.shape

## Target

In [4]:
from sklearn.preprocessing import OneHotEncoder

### retrieving audio name and getting out just the emotion label

In [5]:
ids=[]
for audio in audio_files:
    ids.append(audio[30:-4])

new_df = pd.DataFrame(ids)
new_df.rename(columns={0: "titulo"},inplace=True)
new_df['emotion'] = new_df.apply(lambda x: x[0].split("-")[2], axis=1)
y = new_df['emotion']

### One hot encoding y

In [7]:
encoder = OneHotEncoder()
a =encoder.fit_transform(new_df[['emotion']])
enc_df = pd.DataFrame(a.toarray())
new_df = new_df.join(enc_df)
y = new_df.drop(columns=['titulo','emotion'])

In [8]:
y.shape

(1440, 8)

## split train test

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# - Modeling

- Since we are dealing with spectograms, the best idea was to use a CNN since we are dealing with images

In [32]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten,MaxPool2D,Dropout

## model init

In [43]:
model = Sequential()
model.add(Conv2D(16, (3,3), activation='relu', input_shape=(1025, 94, 1)))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(16, (3,3), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(16, activation='relu'))
model.add(Dense(8, activation='softmax'))

## model compile

In [44]:
model.compile(optimizer='Adam', loss='CategoricalCrossentropy', metrics=['accuracy'])

In [45]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_10 (Conv2D)          (None, 1023, 92, 16)      160       
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 511, 46, 16)      0         
 2D)                                                             
                                                                 
 conv2d_11 (Conv2D)          (None, 509, 44, 16)       2320      
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 254, 22, 16)      0         
 2D)                                                             
                                                                 
 flatten_4 (Flatten)         (None, 89408)             0         
                                                                 
 dense_8 (Dense)             (None, 16)               

## model train

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

es = EarlyStopping(patience=5,restore_best_weights=True)

model.fit(X_train,
          y_train,validation_split=0.3,epochs=50,batch_size=32,callbacks=[es])


Epoch 1/50


## model evaluating

In [None]:
model.evaluate(X_test,y_test)