**This file contains the code for building deep neural network and prediction on the test set.**

In [None]:
from google.colab import drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPool2D
from keras.layers import Dense
from keras.layers import Flatten
import tensorflow as tf
import pandas as pd

In [None]:
import pickle
import numpy as np

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
file=open('/content/drive/MyDrive/Project Energy Consumption/df_tr_red_final_modified.txt','rb')
df_tr_red_final=pickle.load(file)

In [None]:
df_tr_red_final.reset_index(inplace=True)

In [None]:
df_tr_red_final.drop(['index','timestamp'],axis=1,inplace=True)

In [None]:
df_tr_red_final.drop('level_0',axis=1,inplace=True)

**Target Transformation**

1.   Here I am taking log1p of the meter readings and then I will evaluate my base models on RMSE which by default becomes the RMSLE(The evaluation metric on which we have to evaluate on).



In [None]:
y_tr=np.log1p(df_tr_red_final['meter_reading'])
df_tr_red_final.drop('meter_reading',axis=1,inplace=True)

**Dropping the features which are not important**

In [None]:
df_tr_red_final.drop(['cloud_coverage','sea_level_pressure','wind_direction','wind_speed',
                      'is_summer_month','is_pub_holiday'],axis=1,inplace=True)

**Dividing the data into train and test**

In [None]:
X_train,X_test,y_train,y_test=train_test_split(df_tr_red_final,y_tr,test_size=0.2,random_state=1)

**I will be building multiple models with diferent layers to see which one performs the best.**

**for my first model I will be using the Keras regressor and will beusing cross_val_score for my validation process.**

**Making a custom loss function to evaluate my model**

In [None]:
from keras import backend as K

In [None]:
def rmse(y_true,y_pred):
  return K.sqrt(K.mean(K.square(y_true-y_pred)))

**Building my 1st neural network**

In [None]:
def nn_model():
 model=Sequential()
 model.add(Dense(128,activation='relu',input_shape=(X_train.shape[1],)))
 model.add(Dense(128,activation='relu'))
 model.add(Dense(128,activation='relu'))
 model.add(Dense(128,activation='relu'))
 model.add(Dense(1))
 model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),loss=rmse)

 

 return model

In [None]:
model_reg=KerasRegressor(build_fn=nn_model,epochs=5)
kf=KFold(n_splits=3,random_state=42)

In [None]:
cross_val_score(model_reg,X_train,y_train,cv=kf,verbose=15,n_jobs=-1)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed: 90.7min
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed: 91.0min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed: 91.0min finished


In [None]:
model_reg.fit(X_train,y_train)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f4dc976f588>

In [None]:
test_pred=model_reg.predict(X_test)

**Test Score from my first model**

In [None]:
np.sqrt(mean_squared_error(y_test,test_pred))

2.0856028

**Building my 2nd neural network**

In [None]:
model_1=Sequential()
model_1.add(Dense(128,activation='relu',input_shape=(X_train.shape[1],)))
model_1.add(Dense(64,activation='relu'))
model_1.add(Dense(32,activation='relu'))
model_1.add(Dense(16,activation='relu'))
model_1.add(Dense(1))
model_1.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),loss=rmse)

In [None]:
model_1.fit(X_train,y_train,epochs=10,validation_data=(X_test,y_test),batch_size=int(X_train.shape[0]/10))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f4dbca85198>

In [None]:
test_pred_1=model_1.predict(X_test)

**Test Score for my 2nd model**

In [None]:
np.sqrt(mean_squared_error(y_test,test_pred_1))

2.083157

**building my 3rd neural network**

In [None]:
model_2=Sequential()
model_2.add(Dense(256,activation='relu',input_shape=(X_train.shape[1],)))
model_2.add(Dense(128,activation='relu'))
model_2.add(Dense(128,activation='relu'))
model_2.add(Dense(64,activation='relu'))
model_2.add(Dense(1))
model_2.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),loss=rmse)

In [None]:
model_2.fit(X_train,y_train,epochs=10,validation_data=(X_test,y_test),batch_size=int(X_train.shape[0]/10))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f4dbcadca20>

In [None]:
test_pred_2=model_2.predict(X_test)

**Test Score on my 3rd base model**

In [None]:
np.sqrt(mean_squared_error(y_test,test_pred_2))

2.0875163

**Building my 4th neural network model**

In [None]:
model_3=Sequential()
model_3.add(Dense(256,activation='relu',input_shape=(X_train.shape[1],)))
model_3.add(Dense(128,activation='relu'))
model_3.add(Dense(64,activation='relu'))
model_3.add(Dense(32,activation='relu'))
model_3.add(Dense(1))
model_3.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),loss=rmse)

In [None]:
model_3.fit(X_train,y_train,epochs=10,validation_data=(X_test,y_test),batch_size=int(X_train.shape[0]/10))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f4dbb7b75f8>

In [None]:
test_pred_3=model_3.predict(X_test)

**Test Score on my 4th model**

In [None]:
np.sqrt(mean_squared_error(y_test,test_pred_3))

2.0793393

**Predictions on my test dataset**

In [None]:
file_1=open('/content/drive/MyDrive/Project Energy Consumption/df_te_red_final_modified.txt','rb')
df_te_red_final=pickle.load(file_1)

**Dropping the features which are not important**

In [None]:
df_te_red_final.drop(['cloud_coverage','sea_level_pressure','wind_direction','wind_speed',
                      'is_summer_month','is_pub_holiday'],axis=1,inplace=True)

In [None]:
df_te_red_final.drop('row_id',axis=1,inplace=True)

**The 4th neural network model is used as it performs best on the test set obtained from 80-20 split.**

In [None]:
y_test=model_3.predict(df_te_red_final)

**Converting th epredicted readings back to normal as we did the log transformation of the target variable.**

In [None]:
y_test=np.expm1(y_test)

In [None]:
y_test_round=np.round(y_test,4)

In [None]:
y_test_df=pd.DataFrame(y_test_round)

In [None]:
y_test_df.rename(columns={0:'meter_reading'},inplace=True)

In [None]:
y_test_df['row_id']=y_test_df.index

In [None]:
y_test_df=y_test_df[['row_id','meter_reading']]

In [None]:
y_test_df['row_id']=y_test_df['row_id'].astype(int)

In [None]:
for i in (y_test_df[y_test_df['meter_reading']<0].index):
  y_test_df['meter_reading'][i]=0

In [None]:
y_test_df.to_csv('mlp_model.csv',index=False,header=True)

In [None]:
from google.colab import files
files=files.download('/content/mlp_model.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

**Predicted meter readings on the final test set**

In [None]:
y_test_df

Unnamed: 0,row_id,meter_reading
0,0,66.7966
1,1,66.7966
2,2,66.7966
3,3,66.7966
4,4,66.7966
...,...,...
41697595,41697595,66.7966
41697596,41697596,66.7966
41697597,41697597,66.7966
41697598,41697598,66.7966


In [None]:
#REF-->https://stackoverflow.com/questions/43855162/rmse-rmsle-loss-function-in-keras/43863854

                                                             **End of Notebook**