In [1]:
import dill, csv, os
from sklearn.model_selection import KFold
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report, confusion_matrix
import xgboost as xgb
import librosa
import numpy as np
import dill
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

seed = 3

In [2]:
# load data
voice_data = dill.load(open('voice_data.joblib', 'rb'))

train_X = voice_data.train_X
train_Y = voice_data.train_Y
train_sounds = voice_data.train_sounds

public_test_X = voice_data.public_test_X
public_test_Y = voice_data.public_test_Y
public_test_sounds = voice_data.public_test_sounds

private_test_X = voice_data.private_test_X
private_test_Y = voice_data.private_test_Y
private_test_sounds = voice_data.private_test_sounds

to_add = []
for i in range(len(train_X)):
    if train_Y[i][0] == 3:
        to_add.append(i)
    elif train_Y[i][0] == 4:
        to_add.append(i)

train_features = np.array(train_X)
train_Y = np.array(train_Y)

train_X_ = list(train_X)
train_Y_ = list(train_Y)
for i in to_add:
    train_X_.append(train_X[i])
    train_Y_.append(train_Y[i])
    train_X_.append(train_X[i])
    train_Y_.append(train_Y[i])

# turn 1~5 to 0~4
train_Y = [i-1 for i in train_Y]
public_test_Y = [i-1 for i in public_test_Y]
private_test_Y = [i-1 for i in private_test_Y]

In [3]:
import statsmodels.api as statsmodels
from hurst import compute_Hc
from librosa.feature import mfcc

# feature function
def get_mfccs(sound):
    mfccs = mfcc(y=sound, sr=44100, n_mfcc=13)
    mfccs = np.mean(mfccs, axis=1)
    return mfccs

# AR parameters for the sound
def get_ar(sound):
    '''
    Input: sound(1D array)
    Output: AR parameters(1D array)
    '''
    AR = statsmodels.regression.linear_model.burg(sound, 14)[0]
    return AR

# Hurst Exponent 
def get_hurst(sound):
    '''
    Input: sound(1D array)
    Output: Hurst exponent(float)
    '''
    H, c, data = compute_Hc(sound, kind='random_walk', simplified=True)
    H = np.array([H])
    return H

In [4]:
def merge_feature(feature1, feature2):
    '''
    Input: feature1, feature2(1D array)
    Output: merged feature(1D array)
    '''
    feature = np.concatenate((feature1, feature2))
    return feature

def get_feature(rows, sounds):
    features = []
    for i in tqdm(range(len(rows))):
        # feature = merge_feature(rows[i][1:], get_mfccs(sounds[i]))
        feature = merge_feature(rows[i], get_hurst(sounds[i]))
        feature = merge_feature(feature, get_ar(sounds[i]))
        features.append(feature)
    return features

train_features = get_feature(train_X, train_sounds)

100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [10:24<00:00,  1.60it/s]


In [5]:
# split to k folds
kf = KFold(n_splits=5, shuffle=True, random_state=seed)

In [6]:
count = {}
for i in train_Y:
    if i[0] not in count:
        count[i[0]] = 1
    else:
        count[i[0]] += 1

print(count)

{1: 220, 4: 32, 0: 536, 2: 168, 3: 44}


In [7]:
param = {'max_depth': 12, 'learning_rate':  0.15, 'objective': 'multi:softmax','colsample_bytree': 0.8, 'random_state': seed}
model = xgb.XGBClassifier(**param)

In [8]:
train_features = [i[1:] for i in train_features]

In [9]:
to_add = []
for i in range(len(train_X)):
    if train_Y[i][0] == 3:
        to_add.append(i)
    elif train_Y[i][0] == 4:
        to_add.append(i)

train_features = list(train_features)
train_Y = list(train_Y)
for i in to_add:
    train_features.append(train_features[i])
    train_Y.append(train_Y[i])
    train_features.append(train_features[i])
    train_Y.append(train_Y[i])

In [10]:
def train_model(model):
    '''
    Input: model
    Output: model
    '''
    for train_index, test_index in kf.split(train_features):
        X_train, X_test = np.array(train_features)[train_index], np.array(train_features)[test_index]
        y_train, y_test = np.array(train_Y)[train_index], np.array(train_Y)[test_index]
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        print(classification_report(y_test, y_pred))
    
    return model

model = train_model(model)

              precision    recall  f1-score   support

           0       0.72      0.80      0.76       107
           1       0.57      0.43      0.49        49
           2       0.68      0.50      0.58        30
           3       0.86      1.00      0.92        24
           4       0.88      1.00      0.93        21

    accuracy                           0.72       231
   macro avg       0.74      0.75      0.74       231
weighted avg       0.71      0.72      0.71       231

              precision    recall  f1-score   support

           0       0.74      0.87      0.80       107
           1       0.53      0.33      0.41        48
           2       0.71      0.61      0.66        33
           3       0.93      1.00      0.96        27
           4       0.84      1.00      0.91        16

    accuracy                           0.74       231
   macro avg       0.75      0.76      0.75       231
weighted avg       0.72      0.74      0.73       231

              precisio

In [11]:
def predict(row, sound):
    features = get_feature(row, sound)
    ids = [i[0] for i in features]
    features = [i[1:] for i in features]
    features = np.array(features)
    y_pred = model.predict(features)
    return ids, y_pred

In [12]:
ids, y_pred = predict(private_test_X, private_test_sounds)
# save to csv
with open('private_pred.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    for i in range(len(ids)):
        writer.writerow([ids[i], y_pred[i]])


100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [04:21<00:00,  1.91it/s]


In [13]:
ids, y_pred = predict(public_test_X, public_test_sounds)
# save to csv
with open('public_pred.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    for i in range(len(ids)):
        writer.writerow([ids[i], y_pred[i]])


100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [04:54<00:00,  1.70it/s]
