In [None]:
import os, re, gc, sys, time, random, warnings, functools
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import signal
from datetime import datetime
from pykalman import KalmanFilter
from IPython.display import display

import optuna
import lightgbm as lgb
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold, TimeSeriesSplit, GroupKFold, GroupShuffleSplit
from optuna.visualization import plot_optimization_history

!pip install tensorflow_addons==0.9.1
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import Callback, LearningRateScheduler, EarlyStopping
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras import losses, models, optimizers
from keras.utils.np_utils import to_categorical

pd.set_option('display.max_columns', 300)
pd.set_option('display.max_rows', 300)
warnings.simplefilter('ignore')
warnings.filterwarnings('ignore')

In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)
    
def reduce_mem_usage(df, verbose=True, y=['time','open_channels']):
    numerics  = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col in y or col_type not in numerics:
            continue
        c_min = df[col].min()
        c_max = df[col].max()
        if str(col_type)[:3] == 'int':
            if   c_min > np.iinfo(np.int8).min  and c_max < np.iinfo(np.int8).max:
                df[col] = df[col].astype(np.int8)
            elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                df[col] = df[col].astype(np.int16)
            elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                df[col] = df[col].astype(np.int32)
            elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                df[col] = df[col].astype(np.int64)  
        else:
            if   c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                df[col] = df[col].astype(np.float16)
            elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                df[col] = df[col].astype(np.float32)
            else:
                df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose:
        print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

def show_cols(show_list, show_num=50, col=True):
    reshaped_list = []
    if show_num < len(show_list):
        for i in range(0, len(show_list)+show_num, show_num):
            if len(show_list) < i: break
            l = sorted(show_list)[i:i+show_num]
            if len(l)==show_num:
                reshaped_list.append(l)
            else:
                reshaped_list.append(l + [None]*(show_num-len(l)))
    else:
        reshaped_list = [sorted(show_list)]
    df_show_col = pd.DataFrame(reshaped_list)
    if 0 < df_show_col.shape[1]:
        display(df_show_col) if col else display(df_show_col.T)
    else:
        print("No features")

In [None]:
def Kalman1D(observations, damping=1):
    # To return the smoothed time series data
    observation_covariance = damping
    initial_value_guess    = observations[0]
    transition_matrix      = 1
    transition_covariance  = 0.1
    kf = KalmanFilter(
            initial_state_mean       = initial_value_guess,
            initial_state_covariance = observation_covariance,
            observation_covariance   = observation_covariance,
            transition_covariance    = transition_covariance,
            transition_matrices      = transition_matrix)
    pred_state, state_cov = kf.smooth(observations)
    del kf
    gc.collect()
    return pred_state

In [None]:
def get_batch(train, test):
    # Concatenate data
    batch_size=500000; batch2_size=4000
    train['set'] = 'train'
    test ['set'] = 'test'
    data  = pd.concat([train, test], sort=False)
    # Add batch and batch2
    data['batch']  = (data.groupby(data.index//batch_size,  sort=False)['signal'].agg(['ngroup']).values).astype(int)
    data['batch2'] = (data.groupby(data.index//batch2_size, sort=False)['signal'].agg(['ngroup']).values).astype(int)
    train = data[data['set']=='train'].drop('set', axis=1).copy()
    test  = data[data['set']=='test'] .drop('set', axis=1).copy()
    del data
    return train, test

def normalize(train, test):
    train_mean  = np.mean(train.signal.tolist())
    train_sigma = np.std(train.signal.tolist())
    train['signal'] = (train.signal - train_mean) / train_sigma
    test['signal']  = (test.signal  - train_mean) / train_sigma
    return train, test

In [None]:
# Signal processing features
def calc_gradients(s, n_grads = 4):
    '''
    Calculate gradients for a pandas series. Returns the same number of samples
    '''
    grads = pd.DataFrame()    
    g = s.values
    for i in range(n_grads):
        g = np.gradient(g)
        grads['grad_' + str(i+1)] = g
    return grads

def calc_low_pass(s, n_filts=10):
    '''
    Applies low pass filters to the signal. Left delayed and no delayed
    '''
    wns = np.logspace(-2, -0.3, n_filts)
    low_pass = pd.DataFrame()
    x = s.values
    for wn in wns:
        b, a = signal.butter(1, Wn=wn, btype='low')
        zi = signal.lfilter_zi(b, a)
        low_pass['lowpass_lf_' + str('%.4f' %wn)] = signal.lfilter(b, a, x, zi=zi*x[0])[0]
        low_pass['lowpass_ff_' + str('%.4f' %wn)] = signal.filtfilt(b, a, x)        
    return low_pass

def calc_high_pass(s, n_filts=10):
    '''
    Applies high pass filters to the signal. Left delayed and no delayed
    '''
    wns = np.logspace(-2, -0.1, n_filts)
    high_pass = pd.DataFrame()
    x = s.values
    for wn in wns:
        b, a = signal.butter(1, Wn=wn, btype='high')
        zi = signal.lfilter_zi(b, a)
        high_pass['highpass_lf_' + str('%.4f' %wn)] = signal.lfilter(b, a, x, zi=zi*x[0])[0]
        high_pass['highpass_ff_' + str('%.4f' %wn)] = signal.filtfilt(b, a, x)        
    return high_pass

def calc_ewm(s, windows=[10, 50, 100, 500, 1000]):
    '''
    Calculates exponential weighted functions
    '''
    ewm = pd.DataFrame()
    for w in windows:
        ewm['ewm_mean_' + str(w)] = s.ewm(span=w, min_periods=1).mean()
        ewm['ewm_std_' + str(w)] = s.ewm(span=w, min_periods=1).std()
    # add zeros when na values (std)
    ewm = ewm.fillna(value=0)
    return ewm

def add_features(s):
    '''
    All calculations together
    '''
    gradients = calc_gradients(s)
    low_pass  = calc_low_pass(s)
    high_pass = calc_high_pass(s)
    ewm       = calc_ewm(s)    
    return pd.concat([s, gradients, low_pass, high_pass, ewm], axis=1)

def divide_and_add_features(s, signal_size=500000):
    '''
    Divide the signal in bags of "signal_size".
    Normalize the data dividing it by 15.0
    '''
    # normalize
    s = s / 15.0
    
    ls = []
    for i in range(int(s.shape[0]/signal_size)):
        sig = s[i*signal_size:(i+1)*signal_size].copy().reset_index(drop=True)
        sig_featured = add_features(sig)
        ls.append(sig_featured)
        
    return pd.concat(ls, axis=0).reset_index(drop=True)

In [None]:
# Rolling and aggreagate batch features
def rolling_features(df, periods, windows, g="batch", c="signal"):
    df = df.copy()
    periods_lead = np.asarray(periods, dtype=np.int32)
    periods_lag  = -periods_lead
    for p in periods_lead:
        df[c+"_lead_t"+str(p)]    = df.groupby(g)[c].shift(periods=p)
    for p in periods_lag:
        df[c+"_lag_t" +str(-1*p)] = df.groupby(g)[c].shift(periods=p)

    for window in windows:
        # roll backwards
        df[c+'_mean_t'+str(window)] = df.groupby(g)[c].transform(lambda x: x.shift(1).rolling(window).mean())
        df[c+'_std_t' +str(window)] = df.groupby(g)[c].transform(lambda x: x.shift(1).rolling(window).std())
        df[c+'_var_t' +str(window)] = df.groupby(g)[c].transform(lambda x: x.shift(1).rolling(window).var())
        df[c+'_min_t' +str(window)] = df.groupby(g)[c].transform(lambda x: x.shift(1).rolling(window).min())
        df[c+'_max_t' +str(window)] = df.groupby(g)[c].transform(lambda x: x.shift(1).rolling(window).max())
        min_max = (df[c] - df[c+'_min_t'+str(window)]) / (df[c+'_max_t'+str(window)] - df[c+'_min_t'+str(window)])
        df[c+'_norm_t'+str(window)] = min_max * (np.floor(df[c+'_max_t'+str(window)]) - np.ceil(df[c+'_min_t'+str(window)]))
        # roll forward
        df[c+'_mean_t'+str(window)+'_lead'] = df.groupby(g)[c].transform(lambda x: x.shift(-window-1).rolling(window).mean())
        df[c+'_std_t' +str(window)+'_lead'] = df.groupby(g)[c].transform(lambda x: x.shift(-window-1).rolling(window).std())
        df[c+'_var_t' +str(window)+'_lead'] = df.groupby(g)[c].transform(lambda x: x.shift(-window-1).rolling(window).var())
        df[c+'_min_t' +str(window)+'_lead'] = df.groupby(g)[c].transform(lambda x: x.shift(-window-1).rolling(window).min())
        df[c+'_max_t' +str(window)+'_lead'] = df.groupby(g)[c].transform(lambda x: x.shift(-window-1).rolling(window).max())
        min_max = (df[c] - df[c+'_min_t'+str(window)+'_lead']) / (df[c+'_max_t'+str(window)+'_lead'] - df[c+'_min_t'+str(window)+'_lead'])
        df[c+'_norm_t'+str(window)+'_lead'] = min_max * (np.floor(df[c+'_max_t'+str(window)+'_lead']) - np.ceil(df[c+'_min_t'+str(window)+'_lead']))
    return df

def static_batch_features(df, n, c="signal", add_detail_feats=True):
    df = df.copy()
    df = df.sort_values(by='time').reset_index(drop=True)
    df.index = ((df.time * 10000) - 1).values
    df['batch_'        +str(n)] = df.index // n
    df['batch_index_'  +str(n)] = df.index - (df['batch_'+str(n)] * n)
    df['batch_slices_' +str(n)] = df['batch_index_'+str(n)] // (n / 10)
    df['batch_slices2_'+str(n)] = df['batch_'+str(n)].astype(str).str.zfill(3) + "_" + df['batch_slices_'+str(n)].astype(str).str.zfill(3)
    
    for g in ['batch_'+str(n), 'batch_slices2_'+str(n)]:
        d = {}
        # -----------------------------------------------
        d[c+'_mean'  +g]   = df.groupby(g)[c].mean()
        d[c+'_median'+g]   = df.groupby(g)[c].median()
        d[c+'_std'   +g]   = df.groupby(g)[c].std()
        if add_detail_feats:
            d[c+'_min'   +g]   = df.groupby(g)[c].min()
            d[c+'_max'   +g]   = df.groupby(g)[c].max()
            min_max            = (d[c+'_mean'+g]-d[c+'_min'+g]) / (d[c+'_max'+g]-d[c+'_min'+g])
            d[c+'_norm'    +g] = min_max * (np.floor(d[c+'_max'+g])-np.ceil(d[c+'_min'+g]))
            d[c+'_p5'    +g]   = df.groupby(g)[c].apply(lambda x: np.percentile(x, 5))
            d[c+'_p10'   +g]   = df.groupby(g)[c].apply(lambda x: np.percentile(x, 10))
            d[c+'_p25'   +g]   = df.groupby(g)[c].apply(lambda x: np.percentile(x, 25))
            d[c+'_p75'   +g]   = df.groupby(g)[c].apply(lambda x: np.percentile(x, 75))
            d[c+'_p90'   +g]   = df.groupby(g)[c].apply(lambda x: np.percentile(x, 90))
            d[c+'_p95'   +g]   = df.groupby(g)[c].apply(lambda x: np.percentile(x, 95))
            d[c+'_skew'  +g]   = df.groupby(g)[c].apply(lambda x: pd.Series(x).skew())
            d[c+'_kurtosis'+g] = df.groupby(g)[c].apply(lambda x: pd.Series(x).kurtosis())
            d[c+'_mean_abs_chg'+g] = df.groupby(g)[c].apply(lambda x: np.mean(np.abs(np.diff(x))))
            d[c+'_abs_max' +g] = df.groupby(g)[c].apply(lambda x: np.max(np.abs(x)))
            d[c+'_abs_min' +g] = df.groupby(g)[c].apply(lambda x: np.min(np.abs(x)))
            d[c+'_range'   +g] = d[c+'_max'+g] - d[c+'_min'+g]
            d[c+'_maxtomin'+g] = d[c+'_max'+g] / d[c+'_min'+g]
            d[c+'_abs_avg' +g] = (d[c+'_abs_min'+g] + d[c+'_abs_max'+g]) / 2
        # -----------------------------------------------
        for v in d:
            df[v] = df[g].map(d[v].to_dict())

    drop_cols  = ["time","open_channels","signal","signal_power"] + [col for col in df.columns if col.startswith("batch")]
    drop_cols  = [col for col in drop_cols if col != c]
    df.drop(drop_cols, axis=1, inplace=True)
    for diffc in df.columns:
        if c == diffc: continue
        df[diffc+'_m'+c] = df[diffc] - df[c]
    return df.reset_index(drop=True)

In [None]:
def load_result_lgbm_regression(train_test, df=None, add_static_feats=False, n=25000, save=True):
    df_reg_preds = pd.read_csv("../input/results-for-ionswitching/lgbm_regression_%s_preds.csv" % train_test,
                               names=["lgbm_reg_result"], skiprows=1)
    if save:
        df_reg_preds.to_csv("./lgbm_regression_%s_preds.csv" % train_test, index=False)
    if add_static_feats and df is not None:
        df_reg_preds = pd.concat([df, df_reg_preds], axis=1)
        df_reg_preds = static_batch_features(df_reg_preds, n, "lgbm_reg_result", False)
    return df_reg_preds

# Load data and set parameters

In [None]:
# Main parameters
CALCULATE_KALMAN = True   # If false, calculated kalman data will be loaded.
OUTPUT_FEATURES  = True   # If true, feature importance file will be generated.
ENSEMBLE_MODELS  = False  # If true, LGBM result and WaveNet result will be ensembled.
TRAIN_WAVENET    = False
TRAIN_LGBM       = True
LGBM_REGRESSION  = True   # If true, LGBM will train on regression task. If not so, multiclass.

In [None]:
# Load data
train = pd.read_csv('../input/data-without-drift/train_clean.csv')
test  = pd.read_csv('../input/data-without-drift/test_clean.csv')

train = reduce_mem_usage(train)
test  = reduce_mem_usage(test)

In [None]:
%%time
if CALCULATE_KALMAN:
    # Kalman Filter
    observation_covariance = .0015
    train.signal = Kalman1D(train.signal.values, observation_covariance)
    test.signal  = Kalman1D(test.signal.values,  observation_covariance)
else:
    train = pd.read_csv("../input/results-for-ionswitching/train_kalman.csv")
    test  = pd.read_csv("../input/results-for-ionswitching/test_kalman.csv")   
    train = reduce_mem_usage(train)
    test  = reduce_mem_usage(test)
    
# Save created data for next kernel.
train.time = train.time.apply(lambda x: round(x,4))
test. time = test .time.apply(lambda x: round(x,4))
train.to_csv('./train_kalman.csv', index=False)
test .to_csv('./test_kalman.csv',  index=False)
gc.collect()

In [None]:
%%time
# Replace some outliers.
train.loc[478587:478588, "signal"] = -2
train.loc[478609:478610, "signal"] = -2

# Normalize signal and add batches.
# train, test = normalize(train, test)
train, test = get_batch(train, test)

# Add signal power
train["signal_power"] = train.signal ** 2
test ["signal_power"] = test .signal ** 2

In [None]:
# Except outlier
non_outlier_batch7 = pd.concat([train[3500000:3642922], train[3822754:4000000]], sort=False)
train = pd.concat([train[:3500000], non_outlier_batch7, train[4000000:]]).reset_index(drop=True)
train = train.sort_values(by="time")

print(train.shape)
del non_outlier_batch7
gc.collect()

# Feature engineering

In [None]:
def get_no_importance_features(thr_no_imp):
    path = "../input/results-for-ionswitching/features_of_lgbm_regression.csv"
    if not os.path.isfile(path):
        print("'features_of_lgbm_regression.csv' is not exist.")
        print("Since this is the first kernel, features file has not been generated yet.")
        return []
    df_imp     = pd.read_csv(path)
    df_imp.to_csv("./features_of_lgbm_regression.csv", index=False)
    feats_len  = df_imp.features.nunique()
    no_feats   = list(df_imp.query("importance_mean <= @thr_no_imp").features.unique())
    print("Ratio of features will be used is %s%%." % (round(((feats_len-len(no_feats))/feats_len*100),2)))
    print("The number of deleted features is %s(all %s)." % (len(no_feats), feats_len))
    show_cols(no_feats)
    return no_feats

def delete_features(df, no_feats):
    if len(no_feats)==0:
        return df
    drop_no_feats = [c for c in df.columns if c in no_feats]
    print("Ratio of deleted featrues is %s(%s/%s)"
          % (len(drop_no_feats)/len(df.columns), len(drop_no_feats), len(df.columns)))
    return df.drop(drop_no_feats, axis=1)

In [None]:
# Parameters
if TRAIN_LGBM:
    cols_rolling = ["signal","signal_power"]
    cols_static  = {"25000" : ["signal","signal_power"],
                    "50000" : []}
    group   = "batch"
    periods = range(1,5)
    windows = [10,50,100,1000]
    add_static_feats = [True, 25000]
    thr_no_imp       = -1
else:
    cols_rolling = ["signal"]
    cols_static  = {"4000" : ["signal"]}
    group   = "batch2"
    periods = range(1,2)
    windows = [100]
    add_static_feats = [False, 4000]
    thr_no_imp       = -1

In [None]:
no_feats = get_no_importance_features(thr_no_imp)

In [None]:
%%time
# Create features for training
dfs_train = [train]

print("Processing for 'rolling_features'")
dcols = ["signal","signal_power","time","open_channels","batch","batch2"]
for col in cols_rolling:
    df = reduce_mem_usage(rolling_features(train, periods, windows, g=group, c=col).drop(dcols, axis=1))
    dfs_train.append(delete_features(df, no_feats))
del df
gc.collect()

if TRAIN_WAVENET or (not LGBM_REGRESSION):
    print("=================================")
    print("Processing for 'load_result_lgbm_regression'")
    add, n = add_static_feats
    dfs_train.append(reduce_mem_usage(load_result_lgbm_regression("train", train, add, n)))
        
if TRAIN_LGBM:
    print("=================================")
    print("Processing for 'divide_and_add_features'")
    df = reduce_mem_usage(divide_and_add_features(train.signal).drop("signal", axis=1))
    dfs_train.append(delete_features(df, no_feats))
    del df
    gc.collect()

    print("=================================")
    print("Processing for 'static_batch_features'")
    for n, cols in cols_static.items():
        for col in cols:
            df = reduce_mem_usage(static_batch_features(train, int(n), col).drop(col, axis=1))
            dfs_train.append(delete_features(df, no_feats))
        
del df, train
gc.collect()

In [None]:
# Join features for training.
train = pd.concat(dfs_train, axis=1, sort=False)
print(train.shape)
del dfs_train
gc.collect()

In [None]:
%%time
# Create features for testing.
dfs_test = [test]

print("Processing for 'rolling_features'")
dcols = ["signal","signal_power","time","open_channels","batch","batch2"]
for col in cols_rolling:
    df = reduce_mem_usage(rolling_features(test, periods, windows, g=group, c=col).drop(dcols, axis=1))
    dfs_test.append(delete_features(df, no_feats))
del df
gc.collect()

if TRAIN_WAVENET or (not LGBM_REGRESSION):
    print("=================================")
    print("Processing for 'load_result_lgbm_regression'")
    add, n = add_static_feats
    dfs_test.append(reduce_mem_usage(load_result_lgbm_regression("test", test, add, n)))

if TRAIN_LGBM:
    print("=================================")
    print("Processing for 'divide_and_add_features'")
    df = reduce_mem_usage(divide_and_add_features(test.signal).drop("signal", axis=1))
    dfs_test.append(delete_features(df, no_feats))
    del df
    gc.collect()
    
    print("=================================")
    print("Processing for 'static_batch_features'")
    for n, cols in cols_static.items():
        for col in cols:
            df = reduce_mem_usage(static_batch_features(test, int(n), col).drop(col, axis=1))
            dfs_test.append(delete_features(df, no_feats))
    
del df, test
gc.collect()

In [None]:
# Join features for testing.
test = pd.concat(dfs_test, axis=1, sort=False)
print(test.shape)
del dfs_test
gc.collect()

In [None]:
%%time
if TRAIN_WAVENET:
    print("Processing train data...")
    for feat in train.columns:
        if train[train[feat].isnull()].shape[0]==0: continue
        for batch in train.batch.unique():
            _mean = np.mean(train[(train.batch==batch)&(train[feat].notnull())][feat].tolist())
            train.loc[(train.batch==batch)&(train[feat].isnull()), feat] = _mean
            
    print("Processing test data...")
    for feat in test.columns:
        if test[test[feat].isnull()].shape[0]==0: continue
        for batch in test.batch.unique():
            _mean = np.mean(test[(test.batch==batch)&(test[feat].notnull())][feat].tolist())
            test.loc[(test.batch==batch)&(test[feat].isnull()), feat] = _mean

In [None]:
print(train.shape)
print(test.shape)
display(train.head())
display(test.head())

In [None]:
class BaseModel(object):
    """
    Base Model Class:

    train_df         : train pandas dataframe
    test_df          : test pandas dataframe
    target           : target column name (str)
    features         : list of feature names
    categoricals     : list of categorical feature names
    n_splits         : K in KFold (default is 3)
    cv_method        : options are .. KFold, StratifiedKFold, TimeSeriesSplit, GroupKFold, or GroupShuffleSplit
    group            : group feature name when GroupKFold or StratifiedGroupKFold are used
    task             : options are .. regression, multiclass, or binary
    param            : dict of parameter, set that if you already define
    parameter_tuning : bool, only for LGB
    seed             : seed (int)
    verbose          : bool
    """

    def __init__(self, train_df, test_df, target, features, 
                 valid_df=None, categoricals=[], 
                 n_splits=3, cv_method="KFold", group=None,
                 task="regression", params=None, parameter_tuning=False,
                 seed=42, verbose=True):
        self.train_df     = train_df
        if valid_df is not None and valid_df.shape[0]==0:
            self.valid_df = None            
        else:
            self.valid_df = valid_df
        self.test_df      = test_df
        self.target       = target
        self.features     = features
        self.n_splits     = n_splits
        self.categoricals = categoricals
        self.cv_method    = cv_method
        self.group        = group
        self.task         = task
        self.parameter_tuning = parameter_tuning
        self.seed    = seed
        self.cv      = self.get_cv()
        self.verbose = verbose
        if params is None:
            self.params = self.get_params()
        else:
            self.params = params
        self.y_pred, self.y_valid, self.score, self.model, self.oof, self.y_val, self.fi_df = self.fit()

    def train_model(self, train_set, val_set):
        raise NotImplementedError

    def get_params(self):
        raise NotImplementedError

    def convert_dataset(self, x_train, y_train, x_val, y_val):
        raise NotImplementedError

    def calc_metric(self, y_true, y_pred): # this may need to be changed based on the metric of interest
        if self.task in ("multiclass","nn_multiclass"):
            preds = np.argmax(y_pred, axis=1) if y_true.shape != y_pred.shape else y_pred
            return f1_score(y_true, preds, average='macro')                
        if self.task == "binary":
            return f1_score(y_true, y_pred, average='macro')
        if self.task == "regression":
            preds = np.round(np.clip(y_pred, 0, 10)).astype(int)
            return f1_score(y_true, preds, average='macro')

    def get_cv(self):
        if self.cv_method == "KFold":
            cv = KFold(n_splits=self.n_splits, shuffle=True, random_state=self.seed)
            return cv.split(self.train_df)
        if self.cv_method == "StratifiedKFold":
            cv = StratifiedKFold(n_splits=self.n_splits, shuffle=True, random_state=self.seed)
            return cv.split(self.train_df, self.train_df[self.target])
        if self.cv_method == "TimeSeriesSplit":
            cv = TimeSeriesSplit(max_train_size=None, n_splits=self.n_splits)
            return cv.split(self.train_df)
        if self.cv_method == "GroupKFold":
            if self.group in self.features:
                self.features.remove(self.group)
            cv = GroupKFold(n_splits=self.n_splits)
            return cv.split(self.train_df[self.features], self.train_df[self.target], self.train_df[self.group])
        if self.cv_method == "GroupShuffleSplit":
            if self.group in self.features:
                self.features.remove(self.group)
            cv = GroupShuffleSplit(n_splits=self.n_splits, random_state=self.seed)
            return cv.split(self.train_df[self.features], self.train_df[self.target], self.train_df[self.group])

    def fit(self):
        # Initialize
        y_vals = np.zeros((self.train_df.shape[0], ))
        if self.task in ("multiclass","nn_multiclass"):
            oof_pred = np.zeros((self.train_df.shape[0], self.train_df[self.target].nunique()))
            y_pred   = np.zeros((self.test_df.shape[0],  self.train_df[self.target].nunique()))
            if self.valid_df is not None:
                y_valid = np.zeros((self.valid_df.shape[0], self.train_df[self.target].nunique()))
            else:
                y_valid = None
        else:
            oof_pred = np.zeros((self.train_df.shape[0], ))
            y_pred   = np.zeros((self.test_df.shape[0], ))
            if self.valid_df is not None:
                y_valid = np.zeros((self.valid_df.shape[0], ))
            else:
                y_valid = None
            
        if self.group is not None:
            if self.group in self.features:
                self.features.remove(self.group)
            if self.group in self.categoricals:
                self.categoricals.remove(self.group)
                
        fi = np.zeros((self.n_splits, len(self.features)))
        if y_valid is not None:
            x_valid = self.valid_df[self.features].copy()
            del self.valid_df
            gc.collect()
        x_test = self.test_df[self.features]

        # Fitting with out of fold
        for fold, (train_idx, val_idx) in enumerate(self.cv):
            # Prepare train and test dataset
            x_train, x_val     = self.train_df.loc[train_idx, self.features], self.train_df.loc[val_idx, self.features]
            y_train, y_val     = self.train_df.loc[train_idx, self.target],   self.train_df.loc[val_idx, self.target]
            train_set, val_set = self.convert_dataset(x_train, y_train, x_val, y_val)
            del x_train, y_train
            gc.collect()
            
            # Fit model
            model, importance = self.train_model(train_set, val_set)
            fi[fold, :]       = importance
            y_vals[val_idx]   = y_val
            
            # Get some scores
            if   self.task == "binary":
                oof_pred[val_idx] = model.predict(x_val).reshape(oof_pred[val_idx].shape)
                y_pred += model.predict(x_test).reshape(y_pred.shape) / self.n_splits
                
            elif self.task == "regression":
                oof_pred[val_idx] = model.predict(x_val).reshape(oof_pred[val_idx].shape)
                if y_valid is not None:
                    y_valid += model.predict(x_valid).reshape(y_valid.shape) / self.n_splits
                y_pred += model.predict(x_test).reshape(y_pred.shape) / self.n_splits
                
            elif self.task == "multiclass":
                oof_pred[val_idx] = model.predict(x_val)
                if y_valid is not None:
                    y_valid += model.predict(x_valid).reshape(y_valid.shape) / self.n_splits
                y_pred += model.predict(x_test).reshape(y_pred.shape) / self.n_splits
                
            elif self.task == "nn_multiclass":
                preds   = model.predict(val_set[0])
                oof_pred[val_idx] = preds.reshape(-1, preds.shape[-1])
                if y_valid is not None:
                    preds    = model.predict(self.convert_dataset(x_valid))
                    y_valid += preds.reshape(-1, preds.shape[-1]) / self.n_splits
                preds   = model.predict(self.convert_dataset(x_test))
                y_pred += preds.reshape(-1, preds.shape[-1]) / self.n_splits
                
            print('Partial score of fold {} is: {}'.format(fold, self.calc_metric(y_val, oof_pred[val_idx])))
        
        # Create feature importance data frame
        fi_df = pd.DataFrame()
        for n in np.arange(self.n_splits):
            tmp = pd.DataFrame()
            tmp["features"]   = self.features
            tmp["importance"] = fi[n, :]
            tmp["fold"]       = n
            fi_df = pd.concat([fi_df, tmp], ignore_index=True)
        gfi   = fi_df[["features", "importance"]].groupby(["features"]).mean().reset_index()
        fi_df = fi_df.merge(gfi, on="features", how="left", suffixes=('', '_mean'))
        
        # Calculate oof score
        loss_score = self.calc_metric(y_vals, oof_pred)
        print('Our oof loss score is: ', loss_score)
        
        return y_pred, y_valid, loss_score, model, oof_pred, y_vals, fi_df

    def plot_feature_importance(self, rank_range=[1, 50]):
        fig, ax   = plt.subplots(1, 1, figsize=(10, 20))
        sorted_df = self.fi_df.sort_values(by="importance_mean", ascending=False).reset_index()
        sns.barplot(data=sorted_df.iloc[self.n_splits*(rank_range[0]-1) : self.n_splits*rank_range[1]],
                    x="importance", y="features", orient='h')
        ax.set_xlabel("feature importance")
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        return sorted_df

In [None]:
class LgbModel(BaseModel):
    """
    LGB wrapper
    """
    def train_model(self, train_set, val_set):
        verbosity = 100 if self.verbose else 0
        model     = lgb.train(self.params, train_set, num_boost_round=4000, valid_sets=[train_set, val_set], verbose_eval=verbosity)
        fi        = model.feature_importance(importance_type="gain")
        return model, fi

    def convert_dataset(self, x_train, y_train, x_val=None, y_val=None):
        train_set   = lgb.Dataset(x_train, y_train, categorical_feature=self.categoricals)
        if x_val is not None:
            val_set = lgb.Dataset(x_val,   y_val,   categorical_feature=self.categoricals)
            return train_set, val_set
        return train_set

    def get_params(self):
        # Fast fit parameters
        params = {
          'boosting_type'    : "gbdt",
          'objective'        : self.task,
          'num_leaves'       : 127,
          'max_depth'        : -1,
          'min_data_in_leaf' : 50,
          'learning_rate'    : 0.005,
          'early_stopping_rounds' : 50,
          'bagging_seed'     : 11,
          'random_state'     : 42,
          'verbosity'        : -1
         }

        # List is here: https://lightgbm.readthedocs.io/en/latest/Parameters.html
        if self.task == "regression":
            params["metric"] = "rmse"
        if self.task == "binary":
            params["metric"] = "binary_logloss"
        if self.task == "multiclass":
            params["metric"]    = "multi_logloss"
            params["num_class"] = len(self.train_df[self.target].unique())
        
        # Bayesian Optimization by Optuna
        if self.parameter_tuning:
            # Define objective function
            def objective(trial):
                # Split train and test data
                train_x, test_x, train_y, test_y = train_test_split(self.train_df[self.features], 
                                                                    self.train_df[self.target],
                                                                    test_size=0.3, random_state=self.seed)
                
                dtrain = lgb.Dataset(train_x, train_y, categorical_feature=self.categoricals)
                dtest  = lgb.Dataset(test_x,  test_y,  categorical_feature=self.categoricals)

                # Parameters to be explored
                hyperparams = {'num_leaves'        : trial.suggest_int('num_leaves', 24, 1024),
                               'max_depth'         : trial.suggest_int('max_depth', 4, 16),
                               'min_child_weight'  : trial.suggest_int('min_child_weight', 1, 20),
                               'feature_fraction'  : trial.suggest_uniform('feature_fraction', 0.4, 1.0),
                               'bagging_fraction'  : trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
                               'bagging_freq'      : trial.suggest_int('bagging_freq', 1, 7),
                               'min_child_samples' : trial.suggest_int('min_child_samples', 5, 100),
                               'lambda_l1'         : trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
                               'lambda_l2'         : trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
                               'early_stopping_rounds' : 100}
                # LGBM
                params.update(hyperparams)
                model = lgb.train(params, dtrain, valid_sets=dtest, verbose_eval=500)
                pred  = model.predict(test_x)
                if self.task == "regression":
                    pred = np.round(np.clip(pred, 0, 10)).astype(int)
                return self.calc_metric(test_y, pred)

            # Run optimization
            study = optuna.create_study(direction='minimize')
            study.optimize(objective, n_trials=50)
            print('Number of finished trials: {}'.format(len(study.trials)))
            print('Best trial:')
            trial = study.best_trial
            print('  Value: {}'.format(trial.value))
            print('  Params: ')
            for key, value in trial.params.items():
                print('    {}: {}'.format(key, value))

            params.update(trial.params)
            params["learning_rate"] = 0.001
            # Plot history
            plot_optimization_history(study)
        return params

In [None]:
class WaveNet(BaseModel):
    """
    Wave Net wrapper
    """    
    def get_params(self):
        params = {
            "batch_size"    : 4000,
            "num_classes"   : len(self.train_df[self.target].unique()),
            "learning_rate" : 0.0015,
            "nn_epochs"     : 180,
            "nn_batch_size" : 16,
            "patience"      : 25
        }
        display(params)
        return params

    def get_model(self, shape_):
        
        def cbr(x, out_layer, kernel, stride, dilation):
            x = Conv1D(out_layer, kernel_size=kernel, dilation_rate=dilation, strides=stride, padding="same")(x)
            x = BatchNormalization()(x)
            x = Activation("relu")(x)
            return x
    
        def wave_block(x, filters, kernel_size, n):
            dilation_rates = [2**i for i in range(n)]
            x     = Conv1D(filters=filters, kernel_size=1, padding='same')(x)
            res_x = x
            for dilation_rate in dilation_rates:
                tanh_out = Conv1D(filters=filters, kernel_size=kernel_size, padding='same', activation='tanh',    dilation_rate=dilation_rate)(x)
                sigm_out = Conv1D(filters=filters, kernel_size=kernel_size, padding='same', activation='sigmoid', dilation_rate=dilation_rate)(x)
                x     = Multiply()([tanh_out, sigm_out])
                x     = Conv1D(filters=filters, kernel_size=1, padding='same')(x)
                res_x = Add()([res_x, x])
            return res_x
    
        inp = Input(shape=(shape_))
        x   = cbr(inp, 64, 7, 1, 1)
        x   = BatchNormalization()(x)
        x   = wave_block(x, 16, 3, 12)
        x   = BatchNormalization()(x)
        x   = wave_block(x, 32, 3, 8)
        x   = BatchNormalization()(x)
        x   = wave_block(x, 64, 3, 4)
        x   = BatchNormalization()(x)
        x   = wave_block(x, 128, 3, 1)
        x   = cbr(x, 32, 7, 1, 1)
        x   = BatchNormalization()(x)
        x   = wave_block(x, 64, 3, 1)
        x   = cbr(x, 32, 7, 1, 1)
        x   = BatchNormalization()(x)
        x   = Dropout(0.2)(x)
        out = Dense(11, activation='softmax', name='out')(x)

        model = models.Model(inputs=inp, outputs=out)
        opt   = Adam(lr=self.params["learning_rate"])
        opt   = tfa.optimizers.SWA(opt)
        model.compile(loss=losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])
        return model        
    
    # function that decrease the learning as epochs increase (i also change this part of the code)
    def lr_schedule(self, epoch):
        if   epoch < 30: return self.params["learning_rate"]
        elif epoch < 40: return self.params["learning_rate"] / 3
        elif epoch < 50: return self.params["learning_rate"] / 5
        elif epoch < 60: return self.params["learning_rate"] / 7
        elif epoch < 70: return self.params["learning_rate"] / 9
        elif epoch < 80: return self.params["learning_rate"] / 11
        elif epoch < 90: return self.params["learning_rate"] / 13
        else:            return self.params["learning_rate"] / 100
    
    def train_model(self, train_set, val_set):
        # Prepare WaveNet model
        K.clear_session()
        config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,inter_op_parallelism_threads=1)
        sess   = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=config)
        tf.compat.v1.keras.backend.set_session(sess)
        model  = self.get_model((None, len(self.features)))
        # Define callbacks
        cb_es  = EarlyStopping(monitor='val_loss', patience=self.params["patience"], verbose=1, mode='auto')
        cb_lr  = LearningRateScheduler(self.lr_schedule)
        # Start training
        model.fit(train_set[0], train_set[1],
                  epochs          = self.params["nn_epochs"],
                  batch_size      = self.params["nn_batch_size"],
                  validation_data = (val_set[0], val_set[1]),
                  callbacks       = [cb_lr, cb_es],
                  verbose         = 2)
        return model, None

    def convert_dataset(self, x_train, y_train=None, x_val=None, y_val=None):
        x_ary     = np.array(x_train)
        train_set = [x_ary.reshape (-1, self.params["batch_size"], len(self.features))]
        if y_train is None:
            return train_set
        yt_ohe = to_categorical(y_train, num_classes=self.params["num_classes"])
        train_set.append(yt_ohe.reshape(-1, self.params["batch_size"], self.params["num_classes"]))
        
        if x_val is not None:
            x_ary   = np.array(x_val)
            yv_ohe  = to_categorical(y_val, num_classes=self.params["num_classes"])
            val_set = [x_ary.reshape (-1, self.params["batch_size"], len(self.features)),
                       yv_ohe.reshape(-1, self.params["batch_size"], self.params["num_classes"])]
            return train_set, val_set
        return train_set

In [None]:
def feature_extraction_corr(df, thrs=[0.99]):
    dict_features = {}
    for thr in thrs:
        dict_features[str(thr)] = []
    corr_matrix = df.corr()
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            for thr in thrs:
                if thr < abs(corr_matrix.iloc[i, j]):
                    dict_features[str(thr)].append(corr_matrix.columns[i])
    for key, item in dict_features.items():
        dict_features[key] = sorted(set(dict_features[key]))
        print(key, "The number of features is %s" % len(dict_features[key]))
    del corr_matrix
    return dict_features

def plot_feature_importance(df, n_splits=1, rank_range=[1, 50]):
    fig, ax   = plt.subplots(1, 1, figsize=(10, 20))
    sorted_df = df.sort_values(by="importance_mean", ascending=False).reset_index()
    sns.barplot(data=sorted_df.iloc[n_splits*(rank_range[0]-1) : n_splits*rank_range[1]],
                x="importance", y="features", orient='h')
    ax.set_xlabel("feature importance")
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    return sorted_df

def plot_results(df, col, thin_out=10):
    for v in sorted(df[col].unique()):
        df_group = df[df[col]==v]
        x = df_group.time[::thin_out];
        y = df_group.signal.values[::thin_out]
        plt.scatter(x, y)
    plt.legend(sorted(df[col].unique()), bbox_to_anchor=(1.05, 1), loc='best', borderaxespad=0)
    
def concat_no_imp_feats(df, no_feats, thr_no_imp):
    df = df.copy()
    df["use"] = True
    df_no_imp = pd.read_csv("../input/results-for-ionswitching/features_of_lgbm_regression.csv")
    df_no_imp = df_no_imp.query("features in @no_feats")[["features","importance_mean"]].drop_duplicates()
    df_no_imp["use"] = False
    return pd.concat([df, df_no_imp], sort=False).sort_values(by="importance_mean", ascending=False).reset_index(drop=True)

# Create WaveNet and LGBM, and ensemble with them.

In [None]:
%%time
# Calculate correlation of features, and the features that have high correlation will be deleted
drop_cols     = ["time","open_channels","batch","batch2"]
df_for_corr   = train.sample(int(train.shape[0]/10), random_state=42)
df_for_corr   = df_for_corr.reset_index(drop=True).drop(drop_cols, axis=1).copy()
dict_features = feature_extraction_corr(df_for_corr, thrs=[0.95, 0.96, 0.97, 0.98, 0.99])

feat_corr_95 = dict_features["0.95"]; feat_corr_96 = dict_features["0.96"]
feat_corr_97 = dict_features["0.97"]; feat_corr_98 = dict_features["0.98"]
feat_corr_99 = dict_features["0.99"]
del df_for_corr, dict_features
gc.collect()

In [None]:
show_cols(feat_corr_95, 200)
show_cols(feat_corr_99, 200)

In [None]:
# Parameters of training
target    = "open_channels"
fold      = "GroupKFold"
group     = "batch2"
task_wn   = "nn_multiclass"
task_lgbm = "regression" if LGBM_REGRESSION else "multiclass"

if TRAIN_WAVENET:
    # WaveNet paramters
    k = 6; thr_tr_subs_ratio = 1
elif task_lgbm == "regression":
    # Regression task on LGBM parameters
    k = 6; thr_tr_subs_ratio = 5
else:
    # Multiclass task on LGBM  parameters
    k = 4; thr_tr_subs_ratio = 10
    
# Threshold of subsample for validation
thr_va_subs_num = 2

# We have to do three trial on LGBM regression.
#  The first trial is to decide hyperparameters, so set False.
#  The second trial is to decide importance features by using that, so set False.
#  The third trial is to create results for all data. That'll be used on LGBM multiclass and WaveNet, so set True.
del_valid_data = False  # If true, validation data will be overridden by None

# Training hyperparameters
tune = False
if tune:
    params = None
else:
    params = {
        'boosting_type': 'gbdt',
        'objective' : task_lgbm,
        'num_leaves': 889,
        'max_depth' : 10,
        'min_data_in_leaf': 50,
        'min_child_weight': 7,
        'feature_fraction': 0.441848950364408,
        'bagging_fraction': 0.7907047350361494,
        'bagging_freq' : 4,
        'min_child_samples': 97,
        'lambda_l1'    : 0.034185189341285234,
        'lambda_l2'    : 9.744523836901568,
        'learning_rate': 0.001,
        'early_stopping_rounds': 100,
        'bagging_seed': 11,
        'random_state': 42,
        'verbosity'   : -1}

    if task_lgbm=="regression":
        params['metric']    = 'rmse'
    else:
        params['metric']    = "multi_logloss"
        params['num_class'] = 11

In [None]:
# Difine features will be used on training
not_use_cols = ["time","open_channels","batch","batch2"]
drop_cols    = feat_corr_99
drop_cols    = [col for col in train.columns if col in drop_cols]
features     = [col for col in train.columns if col not in not_use_cols + drop_cols]

train.drop(drop_cols, axis=1, inplace=True)
test .drop(drop_cols, axis=1, inplace=True)

print("The number of training features is %s," % len(features))
show_cols(features)

In [None]:
# Prepare training data and validation dara
train_data = train.query("batch2 not in (910,955)").reset_index(drop=True)
valid_data = pd.DataFrame()

# Use some train data for validation
if 0 < thr_va_subs_num:
    for batch in train_data.batch.unique():
        data        = train_data.query("batch==@batch")
        valid_batch = list(data.batch2.sample(thr_va_subs_num, random_state=42))
        print("Batch is %s, and batch2 are got from range of %s to %s. Results of sampling are %s."
              % (batch, data.batch2.min(), data.batch2.max(), valid_batch))
        for vb in valid_batch:
            valid_data = valid_data.append(data.query("batch2==@vb"))

    valid_data = valid_data.reset_index(drop=True)
    train_data = train_data.query("batch2 not in @valid_data.batch2.unique()").reset_index(drop=True)
    del data
    
# If 'del_valid_data' is true, validation data will be overridden.
if del_valid_data:
    print("validation data were overridden by empty dataframe.")
    valid_data = pd.DataFrame()
    
# If 'thr_tr_subs_ratio' is over 1, training data will be subsampled.
if 1 < thr_tr_subs_ratio and TRAIN_LGBM:
    extract_num = int(train_data.shape[0]/thr_tr_subs_ratio)
    train_data  = train_data.sample(extract_num, random_state=42).reset_index(drop=True)
    
train_data.shape, valid_data.shape

In [None]:
del train
gc.collect()

In [None]:
%%time
if TRAIN_WAVENET:
    seed_everything()
    # Start training
    wn = WaveNet(train_data, test, target, features,
                 valid_df=valid_data, task=task_wn,
                 cv_method=fold, n_splits=k, group=group,
                 verbose=False)

In [None]:
%%time
if TRAIN_LGBM:
    # Start training
    lgbm = LgbModel(train_data, test, target, features,
                    valid_df=valid_data, task=task_lgbm,
                    cv_method=fold, n_splits=k, group=group, 
                    params=params, parameter_tuning=tune,
                    verbose=False)

# Save results for submission and next kernels

In [None]:
%%time
# Save predict results.
train_preds_wn   = None; test_preds_wn   = None
train_preds_lgbm = None; test_preds_lgbm = None

if TRAIN_WAVENET:
    print("Saving WaveNet results...")
    train_preds_wn = pd.DataFrame(wn.y_valid)
    test_preds_wn  = pd.DataFrame(wn.y_pred)
    train_preds_wn.to_csv("./wavenet_train_preds.csv", index=False)
    test_preds_wn .to_csv("./wavenet_test_preds.csv",  index=False)

if TRAIN_LGBM:
    print("Saving LGBM results...")
    if lgbm.y_valid is None:
        train_preds_lgbm = pd.DataFrame(lgbm.oof)        
    else:
        train_preds_lgbm = pd.DataFrame(lgbm.y_valid)
    test_preds_lgbm      = pd.DataFrame(lgbm.y_pred)
    train_preds_lgbm.to_csv("./lgbm_%s_train_preds.csv" % task_lgbm, index=False)        
    test_preds_lgbm .to_csv("./lgbm_%s_test_preds.csv"  % task_lgbm, index=False)
    
if ENSEMBLE_MODELS:
    # If model training has not been run, the previous kernel results are loaded.
    if train_preds_wn is None:
        train_preds_wn   = pd.read_csv("../input/results-for-ionswitching/wavenet_train_preds.csv")
        test_preds_wn    = pd.read_csv("../input/results-for-ionswitching/wavenet_test_preds.csv")
        # Save for next kernels.
        train_preds_wn.to_csv("./wavenet_train_preds.csv", index=False)        
        test_preds_wn .to_csv("./wavenet_test_preds.csv",  index=False)
    if train_preds_lgbm is None:
        train_preds_lgbm = pd.read_csv("../input/results-for-ionswitching/lgbm_multiclass_train_preds.csv")
        test_preds_lgbm  = pd.read_csv("../input/results-for-ionswitching/lgbm_multiclass_test_preds.csv")
        # Save for next kernels.
        train_preds_lgbm.to_csv("./lgbm_multiclass_train_preds.csv", index=False)        
        test_preds_lgbm .to_csv("./lgbm_multiclass_test_preds.csv",  index=False)

In [None]:
# Show scores
if TRAIN_WAVENET:
    train_score = wn.score
    del wn
if TRAIN_LGBM:
    train_score = lgbm.score
    fi_df       = lgbm.fi_df.copy()
    display(lgbm.params)    
    del lgbm

if 0 < valid_data.shape[0]:
    gt = valid_data.open_channels
    if TRAIN_WAVENET:
        valid_preds = np.argmax(np.array(train_preds_wn), axis=1)
    if TRAIN_LGBM:
        valid_preds = np.argmax(np.array(train_preds_lgbm), axis=1)
    valid_score = f1_score(gt, valid_preds, average='macro')
else:
    valid_score = None

print("Training  : %s" % train_score)
print("validation: %s" % valid_score)
gc.collect()

In [None]:
%%time
# Search the best parameters for ensembling.
if ENSEMBLE_MODELS:
    gt = valid_data.open_channels
    results = []
    for a in np.arange(0, 1.001, 0.01):
        preds = np.argmax(np.array(a*train_preds_wn) + np.array((1-a)*train_preds_lgbm), axis=1)
        results.append([a, 1-a, f1_score(gt, preds, average='macro')])
    df_ensembled = pd.DataFrame(results, columns=["WaveNet_ratio","LGBM_ratio","score"])
    wn_a, lgbm_a ,best_score = df_ensembled.query("score==@df_ensembled.score.max()").values[0]
    
    print("Best parameters are %s(WaveNet) and %s(LGBM). Best score is %s." % (wn_a, lgbm_a, best_score))
    print("==========================")
    df_ensembled.plot(x="WaveNet_ratio",y="score")

In [None]:
# Save submission files
if ENSEMBLE_MODELS:
    print("Use %s(WaveNet) and %s(LGBM)." % (wn_a, lgbm_a))
    test.open_channels = np.argmax(np.array(wn_a*test_preds_wn) + np.array(lgbm_a*test_preds_lgbm), axis=1).astype(int)
    test[["time","open_channels"]].to_csv('./submission_ensembled.csv', index=False, float_format='%0.4f')

In [None]:
# Plot of open channels for training
if ENSEMBLE_MODELS:
    plt.figure(figsize=(25,5))
    plot_results(train_data, "open_channels")

In [None]:
# Plot of open channels for testing
if ENSEMBLE_MODELS:
    plt.figure(figsize=(25,5))
    plot_results(test, "open_channels")

In [None]:
# Save submission files
if TRAIN_WAVENET:
    test.open_channels = np.argmax(np.array(test_preds_wn), axis=1).astype(int)
    test[["time","open_channels"]].to_csv('./submission_wavenet.csv', index=False, float_format='%0.4f')
    
if TRAIN_LGBM:
    if task_lgbm == "multiclass":
        test.open_channels = np.argmax(np.array(test_preds_lgbm), axis=1).astype(int)
    else:
        test.open_channels = np.round(np.clip(np.array(test_preds_lgbm), 0, 10)).astype(int)
    test[["time","open_channels"]].to_csv('./submission_lgbm_%s.csv' % task_lgbm, index=False, float_format='%0.4f')

In [None]:
# Plot of open channels for testing
if TRAIN_WAVENET or TRAIN_LGBM:
    plt.figure(figsize=(25,5))
    plot_results(test, "open_channels")

In [None]:
# Plot feature importance
if TRAIN_LGBM:
    df_feat_sorted = plot_feature_importance(fi_df, n_splits=k)
    df_feat_sorted = concat_no_imp_feats(fi_df, no_feats, thr_no_imp)
    if OUTPUT_FEATURES:
        df_feat_sorted.to_csv("./features_of_lgbm_%s.csv" % task_lgbm, index=False)
    display(df_feat_sorted[["features","importance_mean","use"]].drop_duplicates().head(20))    