In [None]:
from platform import python_version
import pandas as pd
import numpy as np 
import os
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import roc_auc_score
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import PowerTransformer, RobustScaler, StandardScaler, MinMaxScaler

print(f'python {python_version()}')
print(f'pandas {pd.__version__}')
print(f'numpy {np.__version__}')
from sklearn.model_selection import KFold
from tqdm import tqdm
from sklearn.multioutput import MultiOutputRegressor
import lightgbm as lgb
from scipy import integrate
import seaborn as sns
print(f'lgb {lgb.__version__}')
import operator
import datetime

from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import train_test_split
from scipy import stats
from scipy.stats import ks_2samp
import random
import copy

from scipy.stats import norm, kurtosis
from sklearn.metrics import make_scorer
from utils import *

In [None]:
import tensorflow as tf
import keras
import keras.backend as K
from keras import layers, models, optimizers
from keras.layers import Dense, Activation, BatchNormalization, AlphaDropout, Dropout, Add, Concatenate, Flatten, Lambda
from keras.layers import LSTM, GRU, Conv1D, MaxPooling1D, GlobalAveragePooling1D, AveragePooling1D, Bidirectional, GlobalMaxPooling1D
from keras.regularizers import l2
from keras.models import Sequential, Model, Input, load_model

from keras.wrappers.scikit_learn import KerasRegressor


In [None]:
def mish(x):
    return x*K.tanh(K.softplus(x))

def custom_x_nn(y_true, y_pred):
    _t, _p = y_true, y_pred
    score = 0.5 * K.mean((K.square(_t - _p)/2e+04))
    return score

def custom_y_nn(y_true, y_pred):
    _t, _p = y_true, y_pred
    score = 0.5 * K.mean((K.square(_t - _p)/2e+04))
    return score

def custom_m_nn(y_true, y_pred):
    _t, _p = y_true, y_pred
    score = 0.5 * K.mean((K.square((_t - _p) / (_t + 1e-06)))) 
    return score

def custom_v_nn(y_true, y_pred):
    _t, _p = y_true, y_pred
    score = 0.5 * K.mean((K.square((_t - _p) / (_t + 1e-06))))
    return score


def kaeri_metric_nn(y_true, y_pred):
    return 0.5 * E1(y_true, y_pred) + 0.5 * E2(y_true, y_pred)


### E1과 E2는 아래에 정의됨 ###

def E1(y_true, y_pred):
    _t, _ = tf.split(y_true, 2, 1)
    _p, _ = tf.split(y_pred, 2, 1)
    
    return K.mean(K.sum(K.square(_t - _p), axis = 1) / 2e+04)


def E2(y_true, y_pred):
    _, _t = tf.split(y_true, 2, 1)
    _, _p = tf.split(y_pred, 2, 1)
    
    return K.mean(K.sum(K.square((_t - _p) / (_t + 1e-06)), axis = 1))

weight1 = np.array([1,1,0,0])
weight2 = np.array([0,0,1,1])

def my_loss(y_true, y_pred):
    divResult = Lambda(lambda x: x[0]/x[1])([(y_pred-y_true),(y_true+0.000001)])
    return K.mean(K.square(divResult))


def my_loss_E1(y_true, y_pred):
    return K.mean(K.square(y_true-y_pred)*weight1)/2e+04

def my_loss_E2(y_true, y_pred):
    divResult = Lambda(lambda x: x[0]/x[1])([(y_pred-y_true),(y_true+0.000001)])
    return K.mean(K.square(divResult)*weight2)

def get_mfcc_features(train_df):
    new_df = pd.DataFrame(index = train_df['id'].unique())
    for name, group in tqdm(train_df.groupby(['id'])):
        for i in range(1,5):
            s_data = group['S' + str(i)].values
            #time = get_wave_arrival_time_threshold(s_data, 500)
            mfcc_data = get_mfcc_result(s_data)
            for j in range(0, len(mfcc_data)):
                new_df.loc[name, 'S' + str(i) + 'mfcc' + str(j)] = mfcc_data[j]

   
    return new_df

def repair(data):
    x = data.copy()
    x[:,0:2] *= 400
    x[:,2] *= 100
    return x

In [None]:
def step_decay(epoch):
    initial_lrate = 0.005
    drop = 0.5
    epochs_drop = 15.0
    lrate = initial_lrate * math.pow(drop,  
           math.floor((1+epoch)/epochs_drop))
    
    lrate = max(1e-4, lrate)
    return lrate

es = keras.callbacks.EarlyStopping(patience=30, restore_best_weights=True)
lrs = keras.callbacks.LearningRateScheduler(step_decay)

In [None]:
train_features = pd.read_csv('./data/train_features.csv')
train_Y = pd.read_csv('./data/train_target.csv')
test_features = pd.read_csv('./data/test_features.csv')
sample_submission = pd.read_csv('./data/sample_submission.csv')


In [None]:
# stats_train = get_stats_features(train_features)
# rolling_train= get_rolling_features(train_features)
# fre_train= get_frequency_features(train_features)
mfcc_train = get_mfcc_features(train_features)
# time_train = get_every_s2(train_features)

# stats_test = get_stats_features(test_features)
# rolling_test= get_rolling_features(test_features)
# fre_test= get_frequency_features(test_features)
mfcc_test = get_mfcc_features(test_features)
# time_test = get_every_s2(test_features)

In [None]:
s1 = mfcc_train[mfcc_train.columns[mfcc_train.columns.str.contains('S1')]]
s2 = mfcc_train[mfcc_train.columns[mfcc_train.columns.str.contains('S2')]]
s3 = mfcc_train[mfcc_train.columns[mfcc_train.columns.str.contains('S3')]]
s4 = mfcc_train[mfcc_train.columns[mfcc_train.columns.str.contains('S4')]]

q = np.zeros((32, 4))
for i in mfcc_train.index:
    n = np.hstack([s1.loc[i].values.reshape(-1, 1), 
                   s2.loc[i].values.reshape(-1, 1),
                   s3.loc[i].values.reshape(-1, 1),
                   s4.loc[i].values.reshape(-1, 1)])
    q = np.vstack([q, n])
    
mfcc_ts_train = q.reshape((2801, 32, 4))[1:]


s1 = mfcc_test[mfcc_test.columns[mfcc_test.columns.str.contains('S1')]]
s2 = mfcc_test[mfcc_test.columns[mfcc_test.columns.str.contains('S2')]]
s3 = mfcc_test[mfcc_test.columns[mfcc_test.columns.str.contains('S3')]]
s4 = mfcc_test[mfcc_test.columns[mfcc_test.columns.str.contains('S4')]]

q = np.zeros((32, 4))
for i in mfcc_test.index:
    n = np.hstack([s1.loc[i].values.reshape(-1, 1), 
                   s2.loc[i].values.reshape(-1, 1),
                   s3.loc[i].values.reshape(-1, 1),
                   s4.loc[i].values.reshape(-1, 1)])
    q = np.vstack([q, n])
    
mfcc_ts_test = q.reshape((701, 32, 4))[1:]

In [None]:
n = np.zeros((32, 4))

for i in range(2800):
    n1 = (np.fft.fft(train_features['S1'].loc[i*375:(i+1)*375], norm='ortho').imag[:32].reshape(-1, 1))
    n2 = (np.fft.fft(train_features['S2'].loc[i*375:(i+1)*375], norm='ortho').imag[:32].reshape(-1, 1))
    n3 =(np.fft.fft(train_features['S3'].loc[i*375:(i+1)*375], norm='ortho').imag[:32].reshape(-1, 1))
    n4 =(np.fft.fft(train_features['S4'].loc[i*375:(i+1)*375], norm='ortho').imag[:32].reshape(-1, 1))
    n1 /= np.max(abs(n1))
    n2 /= np.max(abs(n2))
    n3 /= np.max(abs(n3))
    n4 /= np.max(abs(n4))
    d = np.hstack([n1, n2, n3, n4])
    n = np.vstack([n, d])
    
fre_ts_imag_train = n.reshape((2801, 32, 4))[1:]

n = np.zeros((32, 4))

for i in range(700):
    n1 = (np.fft.fft(test_features['S1'].values[i*375:(i+1)*375], norm='ortho').imag[:32].reshape(-1, 1))
    n2 = (np.fft.fft(test_features['S2'].values[i*375:(i+1)*375], norm='ortho').imag[:32].reshape(-1, 1))
    n3 =(np.fft.fft(test_features['S3'].values[i*375:(i+1)*375], norm='ortho').imag[:32].reshape(-1, 1))
    n4 =(np.fft.fft(test_features['S4'].values[i*375:(i+1)*375], norm='ortho').imag[:32].reshape(-1, 1))
    n1 /= np.max(abs(n1))
    n2 /= np.max(abs(n2))
    n3 /= np.max(abs(n3))
    n4 /= np.max(abs(n4))
    d = np.hstack([n1, n2, n3, n4])
    n = np.vstack([n, d])

fre_ts_imag_test = n.reshape((701, 32, 4))[1:]


In [None]:
n = np.zeros((32, 4))

for i in range(2800):
    n1 = (np.fft.fft(train_features['S1'].loc[i*375:(i+1)*375], norm='ortho').real[:32].reshape(-1, 1))
    n2 = (np.fft.fft(train_features['S2'].loc[i*375:(i+1)*375], norm='ortho').real[:32].reshape(-1, 1))
    n3 =(np.fft.fft(train_features['S3'].loc[i*375:(i+1)*375], norm='ortho').real[:32].reshape(-1, 1))
    n4 =(np.fft.fft(train_features['S4'].loc[i*375:(i+1)*375], norm='ortho').real[:32].reshape(-1, 1))
    n1 /= np.max(abs(n1))
    n2 /= np.max(abs(n2))
    n3 /= np.max(abs(n3))
    n4 /= np.max(abs(n4))
    d = np.hstack([n1, n2, n3, n4])
    n = np.vstack([n, d])
    
fre_ts_real_train = n.reshape((2801, 32, 4))[1:]

n = np.zeros((32, 4))

for i in range(700):
    n1 = (np.fft.fft(test_features['S1'].values[i*375:(i+1)*375], norm='ortho').real[:32].reshape(-1, 1))
    n2 = (np.fft.fft(test_features['S2'].values[i*375:(i+1)*375], norm='ortho').real[:32].reshape(-1, 1))
    n3 =(np.fft.fft(test_features['S3'].values[i*375:(i+1)*375], norm='ortho').real[:32].reshape(-1, 1))
    n4 =(np.fft.fft(test_features['S4'].values[i*375:(i+1)*375], norm='ortho').real[:32].reshape(-1, 1))
    n1 /= np.max(abs(n1))
    n2 /= np.max(abs(n2))
    n3 /= np.max(abs(n3))
    n4 /= np.max(abs(n4))
    d = np.hstack([n1, n2, n3, n4])
    n = np.vstack([n, d])

fre_ts_real_test = n.reshape((701, 32, 4))[1:]

In [None]:
mfcc_ts_train /= np.max(np.abs(mfcc_ts_train))
mfcc_ts_test /= np.max(np.abs(mfcc_ts_test))

In [None]:
ts_train = np.dstack([mfcc_ts_train, fre_ts_imag_train, fre_ts_real_train])
ts_test = np.dstack([mfcc_ts_test, fre_ts_imag_test, fre_ts_real_test])

In [None]:
train_X = train_features.iloc[:,2:].values.reshape((2800,375,4))
test_X = test_features.iloc[:,2:].values.reshape((700,375,4))
train_y = train_Y.iloc[:,1:].values
# train_y[:,0:2]/=400
# train_y[:,2]/=100


tr_X, te_X, tr_y, te_y = train_test_split(ts_train, train_y, test_size=0.3, random_state=42)

In [None]:
def build_model(train_target):
    inputs = Input(shape = (32, 12))
    x = Bidirectional(LSTM(64, return_sequences=True, activation=tf.nn.elu, kernel_initializer='he_normal'))(inputs)
    x = Bidirectional(LSTM(32, return_sequences=True, activation=tf.nn.elu, kernel_initializer='he_normal'))(x)
    
    x1 = GlobalAveragePooling1D()(x)
    x2 = GlobalMaxPooling1D()(x)
    
    x = Concatenate()([x1, x2])
    
    x = Dense(128, activation=mish, kernel_initializer='he_normal')(x)
    x = Dense(64, activation=mish, kernel_initializer='he_normal')(x)
    
    outputs = Dense(4, activation=mish, kernel_initializer='he_normal')(x)

    model = Model(inputs, outputs)
    optimizer = keras.optimizers.Adam()

    global weight2
    if train_target == 1: # only for M
        weight2 = np.array([0,0,1,0])
    elif train_target == 2: # only for V
        weight2 = np.array([0,0,0,1])
       
    if train_target == 0:
        model.compile(#loss='mae',
            loss=my_loss_E1,
            optimizer=optimizer,
            metrics=['mae']
                 )
    else:
        model.compile(#loss='mae',
            loss=my_loss_E2,
            optimizer=optimizer,
            metrics=['mae']
                 )

    return model

In [None]:
nn = build_model(0)
nn.summary()

In [None]:
def train(model,X,Y):
    MODEL_SAVE_FOLDER_PATH = './model/'
    if not os.path.exists(MODEL_SAVE_FOLDER_PATH):
        os.mkdir(MODEL_SAVE_FOLDER_PATH)
        
    history = model.fit(X, Y,
                  epochs=250,
#                   batch_size=256,
                  shuffle=True,
                  validation_split=0.2,
                  verbose = 2,
                  callbacks=[es, lrs])

    fig, loss_ax = plt.subplots()
    acc_ax = loss_ax.twinx()

    loss_ax.plot(history.history['loss'], 'y', label='train loss')
    loss_ax.plot(history.history['val_loss'], 'r', label='val loss')
    loss_ax.set_xlabel('epoch')
    loss_ax.set_ylabel('loss')
    loss_ax.legend(loc='upper left')
    plt.show()    

    return model

In [None]:
MODEL_SAVE_FOLDER_PATH = './model/'
sub = pd.read_csv('./data/sample_submission.csv')
te_ys = pd.DataFrame(np.zeros_like(te_y), columns=['X', 'Y', 'M', 'V'])

models = [None for _ in range(3)]
for train_target in range(3):
#     model = set_model(train_target)
    model = build_model(train_target)
    models[train_target] = train(model,tr_X, tr_y)    
#     best_model = load_best_model(train_target)

    pred_data_test = models[train_target].predict(ts_test)
    val_pred =  models[train_target].predict(te_X)

    
    if train_target == 0: # x,y 학습
        sub.iloc[:,1] = pred_data_test[:,0]#*400
        sub.iloc[:,2] = pred_data_test[:,1]#*400
        te_ys.iloc[:,0] = val_pred[:,0]#*400
        te_ys.iloc[:,1] = val_pred[:,1]#*400
        
    elif train_target == 1: # m 학습
        sub.iloc[:,3] = pred_data_test[:,2]#*100
        te_ys.iloc[:,2] = val_pred[:,2]#*100

    elif train_target == 2: # v 학습
        sub.iloc[:,4] = pred_data_test[:,3]
        te_ys.iloc[:,3] = val_pred[:,3]
        
# val_score = kaeri_metric(repair(te_y), te_ys)
val_score = kaeri_metric(te_y, te_ys)
print(val_score)

In [None]:
sub