# EMOTION RECOGNITION THROUGH SPEECH (A naive attempt)

The aim of this project was neither to solve the problem of emotion recognition nor to build a highly accurate model but rather to show that this problem is solvable. This was a naive attempt to do so and may contain a few mistakes here and there.

### Loading a lot of modules

In [None]:
import scipy.io.wavfile as wav
import numpy as np
import librosa
import glob
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.model_selection import cross_val_score as cv
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression  as LR
from sklearn.neural_network import MLPClassifier as MLP
from sklearn.preprocessing import MinMaxScaler as scaler
from sklearn.neighbors import KNeighborsClassifier as KNN
import lightgbm as lgb
from xgboost import XGBClassifier as XGB
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0)
import seaborn as sns
from scipy import stats
from scipy.stats import norm
from scipy.signal import stft
import pickle
scale = scaler()

In [None]:
# for target labels
emotions = {
 './wav_hs\\happy': 1,
 './wav_hs\\sadness': 0}
files = glob.glob("./wav_hs/*")

In [None]:
# Creating features to train our predictive model
# Size of feature vector - 112
# Only mfccs, its mean and its derivative are used as features

data = []
target = []
for file in files:
    audios = glob.glob(file+"/*")
    for i in audios:
        sample_rate, X = wav.read(i)
        libceps = librosa.feature.mfcc(y=X,sr=sample_rate,n_mfcc=13)
        libceps = np.transpose(libceps)
        mfcc_delta = librosa.feature.delta(libceps)
        num_ceps = len(libceps)
        
        #mfcc mean
        mfcc_mean = np.mean(libceps[int(num_ceps/10):int(num_ceps*9/10)], axis =0)
        mfcc_max = libceps.max(axis=0)
        mfcc_min = libceps.min(axis=0)
        mfcc_var = libceps.var(axis=0)
        
        #mfcc_delta
        mfcc_delta_mean = np.mean(mfcc_delta[int(num_ceps/10):int(num_ceps*9/10)], axis =0)
        mfcc_delta_max = mfcc_delta.max(axis=0)
        mfcc_delta_min = mfcc_delta.min(axis=0)
        mfcc_delta_var = mfcc_delta.var(axis=0)
        
        #mfcc_mean
        mfcc_mean_mean = np.mean(mfcc_mean)
        mfcc_mean_max = mfcc_mean.max()
        mfcc_mean_min = mfcc_mean.min()
        mfcc_mean_var = mfcc_mean.var()
        
        #mfcc_delta_mean
        mfcc_delta_mean_mean = np.mean(mfcc_delta_mean)
        mfcc_delta_mean_max = mfcc_delta_mean.max()
        mfcc_delta_mean_min = mfcc_delta_mean.min()
        mfcc_delta_mean_var = mfcc_delta_mean.var()
        feature = np.hstack((mfcc_mean,mfcc_max,mfcc_min,mfcc_var,mfcc_delta_mean,mfcc_delta_max,mfcc_delta_min,mfcc_delta_var,
                            mfcc_mean_mean,mfcc_mean_max,mfcc_mean_min,mfcc_mean_var,
                            mfcc_delta_mean_mean,mfcc_delta_mean_max,mfcc_delta_mean_min,mfcc_delta_mean_var))
        data.append(feature)
        target.append(emotions[file])

In [None]:
data = np.array(data)
target = np.array(target)
print("Size of Feature Vector: ",len(data[0]),"\nNumber of 'Happy' Examples: ",len(target[target==1]),
      "\nNumber of 'Sad' Examples: ",len(target[target==0]))

In [None]:
X_train,X_test,y_train,y_test = train_test_split(data,target,train_size=0.6,random_state=0)
X_train = scale.fit_transform(X_train)
X_test = scale.transform(X_test)

In [None]:
# skewness of the training dataset
from scipy.stats import skew
sk = 0
for i in range(112):
    sk += abs(skew(X_train[:,i]))
print(sk/112)

In [None]:
dtrain = lgb.Dataset(X_train, y_train)
dval = lgb.Dataset(X_test, y_test)
params = {'num_leaves' : 256,
         'learning_rate':0.03,
         'metric':'accuracy',
         'objective':'binary',
         'early_stopping_round': 40,
         'max_depth':8,
         'bagging_fraction':0.5,
         'feature_fraction':0.6,
         'bagging_seed':2017,
         'feature_fraction_seed':2017,
         'verbose' : 1,
        }

In [None]:
clf = lgb.train(params, dtrain,num_boost_round=500,valid_sets=(dtrain,dval),verbose_eval=25)

In [None]:
from sklearn.metrics import log_loss
print(log_loss(y_test,clf.predict(X_test)))
print(len(y_test[clf.predict(X_test)>=0.5])/len(y_test[y_test==1]))

In [None]:
# For building our predictive model with other algos

#clf = LR(penalty='l2',C=0.01,max_iter=100)
#clf = SVC(kernel='linear',C=0.01,probability=True,random_state=0)
#clf = RFC(n_estimators=50,max_depth=12,random_state=0)
#clf = MLP(hidden_layer_sizes=[1,10,100],solver='adam',random_state=0)
#clf = KNN(n_neighbors=5)
scores = cv(clf,data,target,cv=10) # for parameter Optimization
print(scores)
print(np.mean(scores))

In [None]:
clf.fit(X_train,y_train)

In [None]:
save = open("./trained.pickle","wb")
pickle.dump(clf,save)
save.close()