In [3]:
import pandas as pd
import os
import numpy as np
from math import sqrt

from sklearn.linear_model import Lasso, ElasticNet, Ridge
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, make_scorer
from scipy.stats import pearsonr
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_validate, KFold
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.pipeline import make_pipeline
# from nltk.stem.snowball import SnowballStemmer
from sklearn.decomposition import PCA
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import TruncatedSVD




In [4]:
FEATURES = 'C:\\Users\\asus\\Desktop\\PMEmo2019\\PMEmo2019\\features\\'
ANNOTATIONS = 'C:\\Users\\asus\\Desktop\\PMEmo2019\\PMEmo2019\\annotations'
DATASET_DIR_2 = 'C:\\Users\\asus\\Desktop\\PMEmo2019\\'




In [5]:

def load_static_features():
    features_csv = os.path.join(FEATURES, 'static_features.csv')
    static_features= pd.read_csv(features_csv, index_col=0)
    return static_features

def load_static_features_and_valence():
    static_features = load_static_features()
    valence_csv = os.path.join(ANNOTATIONS, 'static_annotations.csv')
    valence = pd.read_csv(valence_csv, index_col=0, usecols=['musicId','Valence(mean)'])
    return static_features.join(valence).dropna()

def load_static_features_and_arousal():
    static_features = load_static_features()
    arousal_csv = os.path.join(ANNOTATIONS, 'static_annotations.csv')
    arousal = pd.read_csv(arousal_csv, index_col=0, usecols=['musicId','Arousal(mean)'])
    return static_features.join(arousal).dropna()

def load_audio_dataset(data):
    features = data[data.columns[:-1]].values
    labels = data[data.columns[-1]].values
#     scaler = StandardScaler(copy=False)
#     scaler.fit_transform(features)
    return features, labels

def rmse(y, y_pred):
    return sqrt(mean_squared_error(y, y_pred))


regressors = {
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet(),
    'Ridge': Ridge(),
    'kNN': KNeighborsRegressor(),
    'SVRrbf': SVR(kernel='rbf', gamma='scale'),
    'SVRpoly': SVR(kernel='poly', gamma='scale'),
    'SVRlinear': SVR(kernel='linear', gamma='scale'),
    'DT': DecisionTreeRegressor(max_depth=5),
    'RF': RandomForestRegressor(max_depth=5, n_estimators=10, max_features=1),
  #  'MLP': MLPRegressor(hidden_layer_sizes=(200,50), max_iter=2000),
  # 'AdaBoost': AdaBoostRegressor(n_estimators=10),
}

from tqdm import notebook
import IPython.display as ipd


def cross_val_regression(regressors, features, labels, preprocessfunc):
    columns = list(regressors.keys())
    scores = pd.DataFrame(columns=columns, index=['RMSE'])

    for reg_name, reg in notebook.tqdm(regressors.items(), desc='regressors'):
        scorer = {'rmse': make_scorer(rmse)}
        reg = make_pipeline(*preprocessfunc, reg)
        reg_score = cross_validate(reg, features, labels, scoring=scorer, cv=10, return_train_score=False)
        scores.loc['RMSE', reg_name] = reg_score['test_rmse'].mean()
    #         scores.loc['R', reg_name] = reg_score['test_r'].mean()

    mean_rmse = scores.mean(axis=1)
    std_rmse = scores.std(axis=1)

    scores['Mean'] = mean_rmse
    scores['std'] = std_rmse
    return scores


def format_scores(scores):
    def highlight(s):
        is_min = s == min(s)
        #         is_max = s == max(s)
        #         is_max_or_min = (is_min | is_max)
        return ['background-color: yellow' if v else '' for v in is_min]

    scores = scores.style.apply(highlight, axis=1, subset=pd.IndexSlice[:, :scores.columns[-2]])
    return scores.format('{:.3f}')


prefunc = [StandardScaler()]

print('In Arousal dimension...')
data_a = load_static_features_and_arousal()
features_a, labels_a = load_audio_dataset(data_a)

scores_a_a = cross_val_regression(regressors, features_a, labels_a, prefunc)
ipd.display(format_scores(scores_a_a))



print('In Valence dimension...')
data_v = load_static_features_and_valence()
features_v, labels_v = load_audio_dataset(data_v)

scores_a_v = cross_val_regression(regressors, features_v, labels_v, prefunc)
ipd.display(format_scores(scores_a_v))

In Arousal dimension...

In Valence dimension...



HBox(children=(FloatProgress(value=0.0, description='regressors', max=9.0, style=ProgressStyle(description_wid…

Unnamed: 0,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.184,0.184,0.14,0.136,0.119,0.227,0.11,0.131,0.158,0.154,0.038


HBox(children=(FloatProgress(value=0.0, description='regressors', max=9.0, style=ProgressStyle(description_wid…

Unnamed: 0,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.162,0.162,0.163,0.135,0.121,0.225,0.122,0.141,0.141,0.152,0.032
