In [88]:
import pandas as pd
from math import sqrt
from sklearn.preprocessing import scale
from sklearn.linear_model import Lasso, ElasticNet, Ridge
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, make_scorer
from scipy.stats import pearsonr
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_validate, KFold

### Load the data
794 songs × 6374 features
table 2, table3 [1], [2]
767 songs with annotation
Select 260 core features to reduce the computing complexity 
Core features: the average and the standard deviation of the 65 LLDs as well as their first-order derivatives

In [89]:
feature = pd.read_csv("./PMEmo/PMEmo2019/features/static_features.csv")
annotation = pd.read_csv("./PMEmo/PMEmo2019/annotations/static_annotations_std.csv")

In [90]:
data = pd.merge(feature, annotation)
col = [0] * 260
for i in range(130):
    col[i*2] = 16 + i*31
    col[i*2+1] = 19 + i*31
X  = scale(data.iloc[:,col].values)
y_a = data.iloc[:, 6373].values
y_v = data.iloc[:, 6374].values

### Setup Metric and Regression Model

In [91]:
def rmse(y, y_pred):
    return sqrt(mean_squared_error(y, y_pred))

In [92]:
Regressors = {
    'Lasso': Lasso(alpha=0.01, tol=0.001),
    'ElasticNet': ElasticNet(alpha=0.05),
    'Ridge': Ridge(alpha=0.05, tol=0.001),
    'kNN': KNeighborsRegressor(p=1),
    'SVRrbf': SVR(kernel='rbf', tol=0.0001, C=0.5),
    'SVRpoly': SVR(kernel='poly', C=1, degree=2),
    'SVRlinear': SVR(kernel='linear', C=1),
    'DT': DecisionTreeRegressor(max_depth=10),
    'RF': RandomForestRegressor(max_depth=10, n_estimators=10, max_features=1)
}

### Recognition and Evaluation

In [97]:
def cross_val_regression(regressors, features, labels):
    columns = list(Regressors.keys())
    res = pd.DataFrame(columns=columns, index=['RMSE'])

    for name, reg in Regressors.items():
        #scorer = ('neg_mean_squared_error')
        reg_score = cross_validate(reg, 
                                   features, labels, 
                                   scoring={'rmse': make_scorer(rmse)}, 
                                   cv=10, 
                                   return_train_score=False,
                                   error_score='raise') 
        res.loc['RMSE', name] = reg_score['test_rmse'].mean()

    res['Mean'] = res.mean(axis=1)
    res['std'] = res.std(axis=1)
    return res

In [None]:
#scores_Arousal = cross_val_regression(regressors, X, y_a)

In [99]:
scores_Valence = cross_val_regression(regressors, X, y_v)
print(scores_Valence)

          Lasso ElasticNet     Ridge        kNN     SVRrbf    SVRpoly  \
RMSE  0.0473094  0.0473094  0.065486  0.0495347  0.0510637  0.0514068   

      SVRlinear         DT         RF      Mean       std  
RMSE  0.0507135  0.0519143  0.0462097  0.051216  0.005404  
