In [None]:
!pip install catboost

Collecting catboost
[?25l  Downloading https://files.pythonhosted.org/packages/52/39/128fff65072c8327371e3c594f3c826d29c85b21cb6485980353b168e0e4/catboost-0.24.2-cp36-none-manylinux1_x86_64.whl (66.1MB)
[K     |████████████████████████████████| 66.2MB 56kB/s 
Installing collected packages: catboost
Successfully installed catboost-0.24.2


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
train = pd.read_csv('/content/drive/My Drive/Data/DashSpeed/FeaturesDenseOriginal/Train_features.csv')
validation = pd.read_csv('/content/drive/My Drive/Data/DashSpeed/FeaturesDenseOriginal/Validation_features.csv')
test = pd.read_csv('/content/drive/My Drive/Data/DashSpeed/FeaturesDenseOriginal/Test_features.csv')

In [None]:
train_path = '/content/drive/My Drive/Data/DashSpeed/FeaturesDenseOriginal/Train/'
val_path = '/content/drive/My Drive/Data/DashSpeed/FeaturesDenseOriginal/Validation/'
test_path = '/content/drive/My Drive/Data/DashSpeed/FeaturesDenseOriginal/Test/'

train['id'] = train['id'].apply(lambda x: train_path + x)
validation['id'] = validation['id'].apply(lambda x: val_path + x)
test['id'] = test['id'].apply(lambda x: test_path + x)

In [None]:
print('Train')
X_train = [np.load(path).reshape((8,1920)) for path in train['id']]
y_train = [spd for spd in train['speed']]

print('Validation')
X_val = [np.load(path).reshape((8,1920)) for path in validation['id']]
y_val = [spd for spd in validation['speed']]

print('Test')
X_test = [np.load(path).reshape((8,1920)) for path in test['id']]

Train
Validation
Test


In [None]:
SHAPE = X_train[0].shape

In [None]:
import keras.backend as K
from sklearn.metrics import mean_squared_error

def rmse(y_actual, y_predicted):
    mse = K.mean(K.square(y_actual - y_predicted))
    return K.sqrt(mse)

import keras
from keras.models import Model, Sequential
from keras.layers import *
from keras.activations import *
from keras.optimizers import *
from keras.callbacks import *

inputs = Input(shape=SHAPE)

output = Bidirectional(GRU(units=32, activation='tanh', return_sequences=True))(inputs)
output = Bidirectional(GRU(units=64, activation='tanh'))(output)
output = Dropout(rate=0.2)(output)

output = Dense(units=48, activation='relu', )(output)
output = Dropout(rate=0.3)(output)

suboutput = Dense(units=32, activation='relu')(output)
output = Dropout(rate=0.2)(suboutput)

output = Dense(units=1, activation='relu')(output)

model = Model(inputs, output)
submodel = Model(inputs, suboutput)

model.compile(loss = rmse,
              metrics = [rmse],
              optimizer = Adam())

model.summary()

Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 8, 1920)]         0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 8, 64)             375168    
_________________________________________________________________
bidirectional_3 (Bidirection (None, 128)               49920     
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 48)                6192      
_________________________________________________________________
dropout_4 (Dropout)          (None, 48)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 32)               

In [None]:
from sklearn.model_selection import KFold

test_for_id = pd.read_csv('/content/drive/My Drive/Data/dashspeed/FeaturesDenseOriginal/Test_features.csv')
id = test_for_id['id'].apply(lambda x: int(x.split('.')[0])).values

kfold = KFold(5, shuffle=True, random_state=123)

predictions = []
accuracies = []

kfold.get_n_splits(X_train, y_train)
fold = 1

for train_idx, test_idx in kfold.split(X_train, y_train):
    
    train_x, train_y = np.array(X_train)[train_idx], np.array(y_train)[train_idx]
    val_x, val_y = np.array(X_train)[test_idx], np.array(y_train)[test_idx]

    print(f'FOLD: {fold}')
    model.fit(train_x, train_y,
              validation_data=(val_x, val_y),
              epochs=30, batch_size=16,
              callbacks=[EarlyStopping(patience=5, monitor='val_rmse')]
             )
    print()
    evaluation = model.evaluate(np.array(X_val), np.array(y_val))
    print(f'Evaluation Score: {evaluation}')
    accuracies.append(evaluation[-1])

    prediction = model.predict(np.array(X_test))
    prediction = pd.DataFrame(data={'ID':id, 'speed':prediction.reshape(152,)})
    predictions.append(prediction)

    print('==='*60)

    fold += 1

print()
print(f'MIN: {min(accuracies)}')
print(f'MAX: {max(accuracies)}')
print(f'MEAN: {np.mean(accuracies)}')

FOLD: 1
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30

Evaluation Score: [1.663473129272461, 1.6604417562484741]
FOLD: 2
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30

Evaluation Score: [1.6322540044784546, 1.6287803649902344]
FOLD: 3
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30

Evaluation Score: [1.5863484144210815, 1.5865713357925415]
FOLD: 4
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30

Evaluation Score: [1.5483381748199463, 1.5486613512039185]
FOLD: 5
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30

Evaluation Score: [1.5621737241744995, 1.562031626701355]

MIN: 1.5486613512039185
MAX: 1.6604417562484741
MEAN: 1.5972972869873048


In [None]:
ens_speed = sum([df['speed'] for df in predictions])/5
submission = pd.DataFrame(data={'ID':id, 'speed':ens_speed})

In [None]:
submission.to_csv('submission_neural.csv', index=False)

####*Refitting the model on training and validation data to feed the hidden layer features into catboost.*

In [None]:
model.fit(np.array(X_train), np.array(y_train),
            validation_data=(np.array(X_val), np.array(y_val)),
            epochs=30, batch_size=16,
            callbacks=[EarlyStopping(patience=8, monitor='val_rmse')]
            )

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7fa23c050550>

In [None]:
subpredictions = pd.DataFrame(data=submodel.predict(np.array(X_train + X_val)))
Xtest = pd.DataFrame(data=submodel.predict(np.array(X_test)))

In [None]:
X = subpredictions
y = np.array(y_train + y_val)

from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error

fold = KFold(5, shuffle=True, random_state=12422)

predictions_cat = []
predictions_lgbm = []
predictions_xgb = []

scores = []
predictions = []

i = 1
fold.get_n_splits(X,y)

for train_idx, test_idx in fold.split(X,y):

    print(f'FOLD: {i}')

    Xtrain, ytrain = X.iloc[train_idx], y[train_idx]
    Xval, yval = X.iloc[test_idx], y[test_idx]

    model_cat = CatBoostRegressor(iterations=2000, eval_metric='RMSE', od_type='iter')
    model_cat.fit(Xtrain, ytrain,
                      eval_set = (Xval, yval),
                      early_stopping_rounds = 100,
                      verbose = 50)
    labels_cat = model_cat.predict(Xval)
    prediction_cat = model_cat.predict(Xtest)
    predictions_cat.append(prediction_cat)
    print('---'*40)

    i += 1

FOLD: 1
Learning rate set to 0.026105
0:	learn: 1.5733539	test: 1.4887402	best: 1.4887402 (0)	total: 2.05ms	remaining: 4.09s
50:	learn: 1.4706414	test: 1.4643814	best: 1.4641307 (46)	total: 59.6ms	remaining: 2.28s
100:	learn: 1.4253798	test: 1.4625991	best: 1.4624396 (98)	total: 115ms	remaining: 2.15s
150:	learn: 1.3884507	test: 1.4606338	best: 1.4606338 (150)	total: 173ms	remaining: 2.11s
200:	learn: 1.3595953	test: 1.4620660	best: 1.4599769 (153)	total: 230ms	remaining: 2.06s
250:	learn: 1.3333062	test: 1.4602530	best: 1.4592085 (244)	total: 285ms	remaining: 1.99s
300:	learn: 1.3075356	test: 1.4589147	best: 1.4581373 (281)	total: 340ms	remaining: 1.92s
350:	learn: 1.2813292	test: 1.4576025	best: 1.4561847 (330)	total: 408ms	remaining: 1.92s
400:	learn: 1.2477094	test: 1.4534525	best: 1.4520334 (383)	total: 465ms	remaining: 1.85s
450:	learn: 1.2050996	test: 1.4488109	best: 1.4467831 (440)	total: 525ms	remaining: 1.8s
500:	learn: 1.1738544	test: 1.4472064	best: 1.4455401 (489)	total: 5

In [None]:
submissions = pd.DataFrame(data={'ID':id, 'speed':sum(predictions_cat)/5})
submissions.to_csv('submission_cat_Densenet.csv', index=False)