In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow import keras
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [None]:
def ccc(x,y):
    ''' Concordance Correlation Coefficient'''
    sxy = np.sum((x - x.mean())*(y - y.mean()))/x.shape[0]
    rhoc = 2*sxy / (np.var(x) + np.var(y) + (x.mean() - y.mean())**2)
    return rhoc

In [None]:
def dispCCC(df):
    # Get CCC
    cccVal = ccc(df.loc[:,df.columns[0]], df.loc[:,df.columns[1]])
    cccVal = np.array2string(cccVal, precision=4)
    print('\t\t'+df.columns[0]+'\t\t'+df.columns[1])
    print(df.columns[0]+'\t\t'+'1.0000'+'\t\t'+cccVal)
    print(df.columns[1]+'\t'+cccVal+'\t\t'+'1.0000')

## Use existing submission result


In [None]:
submissionPath = 'outputFile/models/[202109160318]EsModel/es2jSubmission.csv'
submission = pd.read_csv(submissionPath)

In [None]:
submission.loc[1:, ['Arousal', 'Prediction']].plot(figsize=(36, 24), title='Actual VS Prediction', fontsize=16)
plt.show()

In [None]:
correlation = submission.corr(method='pearson')
print('Pearson Correlation')
print(correlation)
print()
print('CCC')
dispCCC(submission)
d0 = submission[['Arousal', 'Prediction']]
plt.ioff()
fig = plt.figure(figsize=[24, 24])
fig.suptitle('Actual Prediction Correlation', fontsize=16)
sns.pairplot(d0, kind='scatter')
plt.show()

## Use the model to predict


In [None]:
modelPath = 'outputFile/models/[202109160318]EsModel'
testDataset = 'inputFile/modelInput/jlco0000st.csv'

In [None]:
# prepare data for lstms
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

# Define scaler, feature number and number of step looking back
scale_range = (0, 1)
scaler = MinMaxScaler(feature_range=scale_range)
n_steps = 24  # exclude the current step
n_features = 7

usingJL = False
transformTarget = True

In [None]:
testingDataset = pd.read_csv('inputFile/modelInput/jlco0000st.csv')
targetOfTestingDatasest = testingDataset['Arousal'][n_steps:]
testingDataset = testingDataset[['RMS', 'F0', 'MFCC1', 'MFCC2', 'MFCC3', 'MFCC4', 'MFCC5']]
print(testingDataset.head(5))

# load and build testing dataset
values = testingDataset.values
# normalize features
testingScaled = scaler.fit_transform(values)
# frame as supervised learning
reframed = series_to_supervised(testingScaled, n_steps, 1)
print(reframed.shape)
values = reframed.values
test = values

test_X = test
test_y = targetOfTestingDatasest

# reshape input to be 3D [samples, timesteps (n_steps before + 1 current step), features]
test_X = test_X.reshape((test_X.shape[0], n_steps + 1, n_features))

In [None]:
# Load the model
model = keras.models.load_model(modelPath)

In [None]:
# make a prediction
if transformTarget:
    inv_yPredict = model.predict(test_X)
    # inv transform the predicted value
    yPredict = scaler.inverse_transform(inv_yPredict.reshape(-1, 1))
    yPredict = yPredict[:, 0]
else:
    yPredict = model.predict(test_X)

# actual value
yActual = test_y
# calculate RMSE
rmse = np.sqrt(mean_squared_error(yActual, yPredict))
print('Test RMSE: %.3f' % rmse)

In [None]:
r2_score(yActual, yPredict)

In [None]:
pred_test_list = [i for i in yPredict]
actualVsPrediction = pd.DataFrame({'Arousal': yActual, 'Prediction': pred_test_list})
actualVsPrediction.loc[1:, ['Arousal', 'Prediction']].plot(figsize=(36, 24), title='Actual VS Prediction', fontsize=16)
plt.show()

In [None]:
correlation = actualVsPrediction.corr(method='pearson')
print('Pearson Correlation')
print(correlation)
print()
print('CCC')
dispCCC(actualVsPrediction)
d0 = actualVsPrediction[['Arousal', 'Prediction']]
plt.ioff()
fig = plt.figure(figsize=[24, 24])
fig.suptitle('Actual Prediction Correlation', fontsize=16)
sns.pairplot(d0, kind='scatter')
plt.show()