In [1]:
import pandas as pd
from math import sqrt
from scipy.stats import pearsonr
from sklearn.preprocessing import scale
from sklearn.linear_model import Lasso, ElasticNet, Ridge
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.model_selection import cross_validate, KFold

### Load the data
794 songs × 6374 features  
767 songs with annotation  
Select 260 core features to reduce the computing complexity   
Core features: the average and the standard deviation of the 65 LLDs as well as their first-order derivatives

In [2]:
feature = pd.read_csv("./PMEmo/PMEmo2019/features/static_features.csv")
annotation = pd.read_csv("./PMEmo/PMEmo2019/annotations/static_annotations_std.csv")

In [3]:
data = pd.merge(feature, annotation)
col = [0] * 236
for i in range(118):
    col[i*2] = 16 + i*31
    col[i*2+1] = 19 + i*31
X = scale(data.iloc[:,col].values)
y_a = data.loc[:, 'Arousal(mean)'].values
y_v = data.loc[:, 'Valence(mean)'].values

### Setup Metric and Regression Model
Metric: (1) Root Mean Square Error(RMSE); (2) Pearson Correlation Coefficient(PCC)  
Regressor: (1) Lasso (2) Ridge (3) Elastic Net (4) SVR (kernel = rbf/linear/poly)

In [4]:
def rmse(y, y_pred):
    return sqrt(mean_squared_error(y, y_pred))

def pcc(y, y_pred):
    return pearsonr(y, y_pred)[0]

Regressors = {
    'Lasso': Lasso(),
    'Ridge': Ridge(),
    'ElasticNet': ElasticNet(),
    'SVRrbf': SVR(kernel='rbf', gamma='scale'),
    'SVRlinear': SVR(kernel='linear', gamma='scale'),
    'SVRpoly': SVR(kernel='poly', gamma='scale'),
    'DT': DecisionTreeRegressor(max_depth=5),
    'RF': RandomForestRegressor(max_depth=5, n_estimators=10, max_features=1)
}

### Recognition and Evaluation

In [5]:
def cross_val_regression(regressors, features, labels):
    columns = list(Regressors.keys())
    res = pd.DataFrame(columns=columns)
    scorer = {'rmse': make_scorer(rmse)}
    
    for name, reg in Regressors.items():
        reg_score = cross_validate(reg, 
                                   features, 
                                   labels, 
                                   scoring = scorer,
                                   cv = 10, 
                                   return_train_score = False,
                                   error_score = 'raise') 
        res.loc['RMSE', name] = round(reg_score['test_rmse'].mean(), 3)
        #res.loc['PCC', name] = reg_score['test_pcc'].mean()
    return res

In [7]:
scores_Arousal = cross_val_regression(Regressors, X, y_a)
scores_Arousal

Unnamed: 0,Lasso,Ridge,ElasticNet,SVRrbf,SVRlinear
RMSE,0.184,0.216,0.184,0.146,0.226


In [9]:
scores_Valence = cross_val_regression(Regressors, X, y_v)
scores_Valence

Unnamed: 0,Lasso,Ridge,ElasticNet,SVRrbf,SVRlinear
RMSE,0.162,0.196,0.162,0.141,0.209
