In [0]:
import sys
import logging
import time
import numpy as np
import pandas as pd

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedShuffleSplit

from xgboost import XGBClassifier

import tensorflow
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.regularizers import l1_l2

Using TensorFlow backend.


In [0]:
def elapsed_time(start_time, end_time):
    elapsed_sec = end_time - start_time
    h = int(elapsed_sec / (60 * 60))
    m = int((elapsed_sec % (60 * 60)) / 60)
    s = int(elapsed_sec % 60)
    return "{}:{:>02}:{:>02}".format(h, m, s)


def standarize_feature(train_df, test_df, cols):
    scaler = StandardScaler()
    for col in cols:
        train_df[col] = scaler.fit_transform(train_df[col].reshape(-1,1).astype(np.float32))
        test_df[col] = scaler.transform(test_df[col].reshape(-1,1).astype(np.float32))
    return None


def extend_bounds(bins):
    bins[0] = bins[0] - 1
    bins[-1] = bins[-1] + 1

In [0]:
def load_data(train_data_path='UCL course/Applied ML/otto/input_data/train.csv', test_data_path = 'UCL course/Applied ML/otto/input_data/test.csv'):
    train_df = pd.read_csv(train_data_path, sep=',', index_col=0, header=0)
    test_df = pd.read_csv(test_data_path, sep=',', index_col=0, header=0)
    
    train_df['target'] = train_df['target'].str[-1].astype(int) - 1
        
    return train_df, test_df

In [0]:
def process_data(train_df, test_df, ylabel='target', standarization=False, transform=None):
    numerical_features = train_df.columns

    if standarization:
        standarized_features = numerical_features
        standarize_feature(train_df, test_df, standarized_features)
    
    X = train_df.drop(ylabel, axis=1).values
    y = train_df[ylabel].values
    X_submission = test_df.values
    
    if transform == 'log':
        X = np.log(X+1)
        X_submission = np.log(X_submission+1)
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        X_submission = scaler.transform(X_submission)
    elif transform == 'sqrt':
        X = np.sqrt(X + 3.0 / 8)
        X_submission = np.sqrt(X_submission + 3.0 / 8)
    elif transform == 'pca':
        pca = PCA(n_components=3).fit(X)
        X = pca.transform(X)
        X_submission = pca.transform(X_submission)
    elif transform == 'tsne':
        tsne = TSNE(n_components=3).fit(X)
        X = tsne.transform(X)
        X_submission = tsne.transform(X_submission)
    elif transform == 'pca+':
        pca = PCA(n_components=3).fit(X)
        X = np.hstack((X, pca.transform(X)))
        X_submission = np.hstack((X, pca.transform(X)))
    elif transform == 'tsne+':
        tsne = TSNE(n_components=3).fit(X)
        X = np.hstack((X, tsne.transform(X)))
        X_submission = np.hstack((X_submission, tsne.transform(X_submission)))        
    return X, y, X_submission

In [0]:
def evaluate(y, y_pred):
    logloss = log_loss(y, y_pred)
    return logloss

In [0]:
def models_Split_train(models, X, y, X_submission, n_classes, n_folds=5):
  sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)
  for train_index, test_index in sss.split(X, y):
      X_train = X[train_index]
      X_val = X[test_index]

      y_train = y[train_index]
      y_val = y[test_index]
  for i, model in enumerate(models):
    print ("Model %d:" % i, model)   
    avg_logloss = 0
    model.fit(X_train, y_train)
    y_test_pred = model.predict_proba(X_val)
    logloss = evaluate(y_val, y_test_pred)
    print ("  logloss: %f" % logloss)
  return logloss

In [0]:
def models_CV_train(models, X, y, X_submission, n_classes, n_folds=5):
    summary = {}

    skf = list(StratifiedKFold(n_folds,shuffle=True, random_state=0).split(X, y))
    
    stack_train = np.zeros((X.shape[0], n_classes, len(models)))
    stack_test = np.zeros((X_submission.shape[0], n_classes, len(models)))
    
    for i, model in enumerate(models):
        print ("Model %d:" % i, model)
        
        avg_logloss = 0
        
        stack_test_model_i = np.zeros((X_submission.shape[0], n_classes, len(skf)))
        for j, (train_idx, test_idx) in enumerate(skf):
            print ("  Fold %d" % j)
            X_train = X[train_idx]
            y_train = y[train_idx]
            X_test = X[test_idx]
            y_test = y[test_idx]

            model.fit(X_train, y_train)
            
            y_test_pred = model.predict_proba(X_test)          
            stack_train[test_idx, :, i] = y_test_pred
            
            logloss = evaluate(y_test, y_test_pred)
            avg_logloss += logloss
            # print ("  logloss: %f" % logloss)
            
            y_submission_pred = model.predict_proba(X_submission)           
            stack_test_model_i[:, :, j] = y_submission_pred
        
        avg_logloss = avg_logloss / n_folds
        print ("model average logloss: %f" % avg_logloss)
        summary[i] = avg_logloss
        
        stack_test[:, :, i] = stack_test_model_i.mean(axis=2)

    return np.swapaxes(stack_train, 1, 2).reshape((X.shape[0], -1)), np.swapaxes(stack_test, 1, 2).reshape((X_submission.shape[0], -1)), summary

In [0]:
def create_2_layer_keras_model(input_dim, output_dim,first,second,denn):
    model = Sequential()
    model.add(Dropout(denn, input_shape=(input_dim,)))
    model.add(Dense(first, use_bias=True,bias_initializer='glorot_normal', kernel_initializer='glorot_normal', activation='relu',bias_regularizer=l1_l2(l1=1e-5, l2=1e-5), kernel_regularizer=l1_l2(l1=1e-5, l2=1e-5)))
    model.add(Dropout(denn))
    model.add(Dense(second,use_bias=True,bias_initializer='glorot_normal',bias_regularizer=l1_l2(l1=1e-5, l2=1e-5), kernel_initializer='glorot_normal', activation='relu', kernel_regularizer=l1_l2(l1=1e-5, l2=1e-5)))
    model.add(Dropout(denn))
    model.add(Dense(output_dim,use_bias=True,bias_initializer='glorot_normal',bias_regularizer=l1_l2(l1=1e-5, l2=1e-5), kernel_initializer='glorot_normal', activation='softmax', kernel_regularizer=l1_l2(l1=1e-5, l2=1e-5)))
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', 'categorical_crossentropy'])
    return model

def create_3_layer_keras_model(input_dim, output_dim,first,second,third,denn):
    model = Sequential()
    model.add(Dropout(0.05, input_shape=(input_dim,)))
    model.add(Dense(first, use_bias=True,bias_initializer='glorot_normal', kernel_initializer='glorot_normal', activation='relu',bias_regularizer=l1_l2(l1=1e-5, l2=1e-5), kernel_regularizer=l1_l2(l1=1e-5, l2=1e-5)))
    model.add(Dropout(denn))    
    model.add(Dense(second, use_bias=True,bias_initializer='glorot_normal',bias_regularizer=l1_l2(l1=1e-5, l2=1e-5), kernel_initializer='glorot_normal', activation='relu', kernel_regularizer=l1_l2(l1=1e-5, l2=1e-5)))
    model.add(Dropout(denn))
    model.add(Dense(third, use_bias=True,bias_initializer='glorot_normal',bias_regularizer=l1_l2(l1=1e-5, l2=1e-5), kernel_initializer='glorot_normal', activation='relu', kernel_regularizer=l1_l2(l1=1e-5, l2=1e-5)))
    model.add(Dropout(denn))
    model.add(Dense(output_dim,use_bias=True,bias_initializer='glorot_normal',bias_regularizer=l1_l2(l1=1e-5, l2=1e-5), kernel_initializer='glorot_normal', activation='softmax', kernel_regularizer=l1_l2(l1=1e-5, l2=1e-5)))
    
    model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy', 'categorical_crossentropy'])
    return model

In [0]:
# start_time = time.time()

logging.basicConfig(level=logging.DEBUG,
          format='[%(asctime)s]: %(message)s ',
          datefmt='%Y-%m-%d %H:%M:%S',
          stream=sys.stdout,
          filemode="w"
          )

In [0]:
logging.info('Load data')
train_df, test_df = load_data()
X, y, X_submission = process_data(train_df, test_df, transform='log')

# training phase 1
logging.info('Training phase 1')
models = []
# models += [MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes = (30, 10), random_state = 1, verbose = True)]
# models += [KerasClassifier(build_fn=create_2_layer_keras_model, input_dim=X.shape[1], output_dim=9, nb_epoch=200, batch_size=256, verbose=0)]
# models += [KerasClassifier(build_fn=create_3_layer_keras_model, input_dim=X.shape[1], output_dim=9, nb_epoch=200, batch_size=256, verbose=0)]

logging.info('List of models to train and ensemble:')

[2020-03-01 12:21:34]: Load data 
[2020-03-01 12:21:35]: NumExpr defaulting to 8 threads. 
[2020-03-01 12:21:35]: Training phase 1 
[2020-03-01 12:21:35]: List of models to train and ensemble: 


In [0]:
# train_models_pred, test_models_pred, summary = models_Split_train(models, X, y, X_submission, n_classes=9, n_folds=5)
first_layer=[4096,2048,1024,526]
second_layer=[2048,1024,526,256]
third_layer=[1024,526,256,128]
dens_list=[0.05]
num=len(first_layer)
loss=np.zeros((num,num,num))
for i in range(num):
  for j in range(num):
    for k in range(num):
      models = [KerasClassifier(build_fn=create_3_layer_keras_model, input_dim=X.shape[1],output_dim=9,first=first_layer[i],second=second_layer[j],third=third_layer[k],denn=dens_list[0], nb_epoch=800, batch_size=256, verbose=0)]
      logloss=models_Split_train(models, X, y, X_submission, n_classes=9, n_folds=5)
      loss[i,j,k]=logloss
print(loss)

Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x00000258ECD0D188>
  logloss: 0.562605
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x00000258ECD0D088>
  logloss: 0.571882
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x00000258ECD0D188>
  logloss: 0.553885
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x00000258ECD0D088>
  logloss: 0.567148
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x00000258ECD0D188>
  logloss: 0.567377
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x00000258ECD0D088>
  logloss: 0.564832
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x00000258ECD0D188>
  logloss: 0.560274
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x000002594AA710C8>
  logloss: 0.557548
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x00000259549BFA08>
  logloss: 0.553663
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 

In [0]:
np.where(loss==np.min(loss))

NameError: ignored

In [0]:
# training phase 1
logging.info('Training phase 1')
models = []
train_models_pred_list=[]
test_models_pred_list=[]
for i in range(120):
  epoch=500+i*10
  models = [KerasClassifier(build_fn=create_3_layer_keras_model, input_dim=X.shape[1],output_dim=9,first=1024,second=512,third=256,denn=0.05, nb_epoch=epoch, batch_size=256, verbose=0)]
  train_models_pred, test_models_pred, summary = models_CV_train(models, X, y, X_submission, n_classes=9, n_folds=5)
  train_models_pred_list.append(train_models_pred)
  test_models_pred_list.append(test_models_pred)

[2020-02-29 16:07:14]: Training phase 1 
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x00000258ECD54B08>
  Fold 0
  logloss: 0.557680
  Fold 1
  logloss: 0.568144
  Fold 2
  logloss: 0.569654
  Fold 3
  logloss: 0.580111
  Fold 4
  logloss: 0.564145
model average logloss: 0.567947
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x00000259A4936F48>
  Fold 0
  logloss: 0.566772
  Fold 1
  logloss: 0.558566
  Fold 2
  logloss: 0.584645
  Fold 3
  logloss: 0.568973
  Fold 4
  logloss: 0.563456
model average logloss: 0.568482
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x00000258ECD54B08>
  Fold 0
  logloss: 0.583106
  Fold 1
  logloss: 0.557151
  Fold 2
  logloss: 0.568221
  Fold 3
  logloss: 0.557760
  Fold 4
  logloss: 0.568083
model average logloss: 0.566864
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x00000259A4936F48>
  Fold 0
  logloss: 0.569861
  Fold 1
  logloss: 0.562197
  Fold 2
  logloss: 0.560008
  Fol

In [0]:
# training phase 1
logging.info('Training phase 2')
models = []
train_models_pred_list_2=[]
test_models_pred_list_2=[]
for i in range(120):
  epoch=300+i*10
  models = [KerasClassifier(build_fn=create_2_layer_keras_model, input_dim=X.shape[1],output_dim=9,first=4096,second=526,denn=0.05, nb_epoch=epoch, batch_size=256, verbose=0)]
  train_models_pred, test_models_pred, summary = models_CV_train(models, X, y, X_submission, n_classes=9, n_folds=5)
  train_models_pred_list_2.append(train_models_pred)
  test_models_pred_list_2.append(test_models_pred)

[2020-03-01 12:22:11]: Training phase 2 
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x0000022BB9F306C8>
  Fold 0
  Fold 1
  Fold 2
  Fold 3
  Fold 4
model average logloss: 0.565820
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x0000022BB9EA6588>
  Fold 0
  Fold 1
  Fold 2
  Fold 3
  Fold 4
model average logloss: 0.561925
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x0000022BB9F306C8>
  Fold 0
  Fold 1
  Fold 2
  Fold 3
  Fold 4
model average logloss: 0.571598
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x0000022BB9EA6588>
  Fold 0
  Fold 1
  Fold 2
  Fold 3
  Fold 4
model average logloss: 0.570243
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x0000022BB9F306C8>
  Fold 0
  Fold 1
  Fold 2
  Fold 3
  Fold 4
model average logloss: 0.560145
Model 0: <keras.wrappers.scikit_learn.KerasClassifier object at 0x0000022B82B08B48>
  Fold 0
  Fold 1
  Fold 2
  Fold 3
  Fold 4
model average logloss: 0.

In [0]:
train_models_pred_final=np.zeros((61878,9))
for i in range(len(train_models_pred_list)):
  train_models_pred_final=train_models_pred_final+train_models_pred_list[i]
  logloss_1 = evaluate(y,train_models_pred_final/(i+1))
print(logloss_1)

NameError: ignored

In [0]:
train_models_pred_final_2=np.zeros((61878,9))
for i in train_models_pred_list_2:
  train_models_pred_final_2=train_models_pred_final_2+i
logloss_2 = evaluate(y,train_models_pred_final_2/len(train_models_pred_list_2))
print(logloss_2)

0.5338584489978203


In [0]:
train_models_pred_final=np.zeros((61878,9))
test_models_pred_final=np.zeros((144368,9))
for i in range(len(train_models_pred_list)):
  train_models_pred_final = train_models_pred_final + train_models_pred_list[i]
  test_models_pred_final = test_models_pred_final + test_models_pred_list[i]
  logloss_2 = evaluate(y,train_models_pred_final/len(train_models_pred_list))
print(logloss_2)

0.5377954185558323


In [0]:
train_models_pred_final=train_models_pred_final/len(train_models_pred_list)
test_models_pred_final=test_models_pred_final/len(test_models_pred_list)
columns = ['Class_' + str(i + 1) for i in range(9)]
submission_df = pd.DataFrame(test_models_pred_final, columns=columns)
submission_df.index = submission_df.index + 1
# submission_df.to_csv('NN_2_12run.csv', sep=',',index_label='id')
np.savetxt("model20_test.csv", test_models_pred_final, delimiter=",")
np.savetxt("model20_train.csv", train_models_pred_final, delimiter=",")