# Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import SGD

from sklearn import metrics
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_transformer

# Data

In [None]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

print('train shape:',train.shape)
print('test shape:',test.shape)

In [None]:
train.head()

In [None]:
# Train and validation data
X_train, X_valid = np.split(train.sample(frac=1, random_state=42), 
                       [int(.8*len(train))])

y_train = X_train['loss'].values
y_valid = X_valid['loss'].values

X_train = X_train.drop(columns = ['loss','id'])
X_valid = X_valid.drop(columns = ['loss','id'])


# Test data
X_test = test.drop(columns = ['id'])

print('Train set:', X_train.shape)
print('Validation set:', X_valid.shape)
print('Test set:', X_test.shape)

In [None]:
#preprocess data

features_num = list(X_train.columns[0:99])

preprocessor = make_column_transformer(
    (StandardScaler(), features_num)
)

X_train = preprocessor.fit_transform(X_train)
X_valid = preprocessor.transform(X_valid)
X_test = preprocessor.fit_transform(X_test)

# Model

In [None]:
input_shape = [X_train.shape[1]]

model = keras.Sequential([
    layers.BatchNormalization(input_shape=input_shape),
    layers.Dense(150, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.Dropout(0.4),
    layers.BatchNormalization(),
    layers.Dense(50, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.Dropout(0.4),
    layers.BatchNormalization(),
    layers.Dense(25, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.Dropout(0.4),
    layers.BatchNormalization(),
    layers.Dense(1),
])

In [None]:
model.compile(
    optimizer=SGD(lr=0.01),
    loss='mse',
    metrics=[keras.metrics.RootMeanSquaredError()],
)

early_stopping = callbacks.EarlyStopping(
    min_delta=0.001, # minimium amount of change to count as an improvement
    patience=20, # how many epochs to wait before stopping
    restore_best_weights=True,
)

history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=32,
    epochs=100,
    callbacks=[early_stopping],
    verbose=1
)


# Model performance

In [None]:
#RMSE
predicted_train = model.predict(X_train)
predicted_valid = model.predict(X_valid)
rmse_train = metrics.mean_squared_error(y_train, predicted_train, squared=False)
rmse_valid = metrics.mean_squared_error(y_valid, predicted_valid, squared=False)
print('Training RMSE: ', rmse_train)
print('Validation RMSE: ', rmse_valid)

In [None]:
#Loss curves
plt.style.use('seaborn-whitegrid')
# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('animation', html='html5')

history_df = pd.DataFrame(history.history)
history_df.loc[0:, ['root_mean_squared_error', 'val_root_mean_squared_error']].plot()
print(("Minimum Root Mean Squared Error: {:0.4f}").format(history_df['root_mean_squared_error'].min()))
print(("Minimum Validation Root Mean Squared Error: {:0.4f}").format(history_df['val_root_mean_squared_error'].min()))

# Prediction

In [None]:
y_pred = model.predict(X_test)

# Submission

In [None]:
preds = pd.read_csv("sample_submission.csv")
preds.loss = y_pred
preds.head()

In [None]:
preds.to_csv('submission14.csv', index=False)