In [1]:
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
import lightgbm as lgbm
from lightgbm import LGBMRegressor
import xgboost as xgb
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor

In [7]:
data = pd.read_csv('merged_iitp2.csv')
water = data.drop(['ELEC_diff','ELEC_before','ELEC','HOT_diff','HOT_before','HOT','WATER_diff','WATER_before'], axis=1)
elec = data.drop(['ELEC_diff','HOT_diff','HOT_before','HOT','WATER_diff','WATER_before','ELEC_before','WATER'], axis=1)
hot = data.drop(['ELEC_diff','ELEC_before','ELEC','HOT_diff','WATER_diff','WATER_before','HOT_before','WATER'], axis=1)

In [8]:
water = water.drop(['YEAR'], axis=1)
elec = elec.drop(['YEAR'], axis=1)
hot = hot.drop(['YEAR'], axis=1)

##  For Scaling

In [10]:
water_X = water.drop(['WATER'], axis=1)
water_Y = water[['WATER']]

scaler_water = MinMaxScaler()
scaler_water.fit(water_X)
scaled_water_X = scaler_water.transform(water_X)

new_water_X = pd.DataFrame(scaled_water_X, index=water_X.index, columns=water_X.columns)
new_water = pd.concat([new_water_X, water_Y], axis=1)

In [11]:
elec_X = elec.drop(['ELEC'], axis=1)
elec_Y = elec[['ELEC']]

scaler_elec = MinMaxScaler()
scaler_elec.fit(elec_X)
scaled_elec_X = scaler_elec.transform(elec_X)

new_elec_X = pd.DataFrame(scaled_elec_X, index=elec_X.index, columns=elec_X.columns)
new_elec = pd.concat([new_elec_X, elec_Y], axis=1)

In [12]:
hot_X = hot.drop(['HOT'], axis=1)
hot_Y = hot[['HOT']]

scaler_hot = MinMaxScaler()
scaler_hot.fit(hot_X)
scaled_hot_X = scaler_hot.transform(hot_X)

new_hot_X = pd.DataFrame(scaled_hot_X, index=hot_X.index, columns=hot_X.columns)
new_hot = pd.concat([new_hot_X, hot_Y], axis=1)

In [13]:
input_water = new_water.iloc[:-1,]
target_water = new_water[['WATER']].iloc[1:]

trainX_water, testX_water, trainY_water, testY_water = train_test_split(input_water,target_water,test_size=0.3,shuffle=False,random_state=0)

In [14]:
input_elec = new_elec.iloc[:-1,]
target_elec = new_elec[['ELEC']].iloc[1:]

trainX_elec, testX_elec, trainY_elec, testY_elec = train_test_split(input_elec,target_elec,test_size=0.3,shuffle=False,random_state=0)

In [15]:
input_hot = new_hot.iloc[:-1,]
target_hot = new_hot[['HOT']].iloc[1:]

trainX_hot, testX_hot, trainY_hot, testY_hot = train_test_split(input_hot,target_hot,test_size=0.3,shuffle=False,random_state=0)

## SVR

In [16]:
model_svr_water = SVR(kernel='rbf', gamma='auto')
model_svr_water.fit(trainX_water, trainY_water)

model_svr_elec = SVR(kernel='rbf', gamma='auto')
model_svr_elec.fit(trainX_elec, trainY_elec)

model_svr_hot = SVR(kernel='rbf', gamma='auto')
model_svr_hot.fit(trainX_hot, trainY_hot)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [17]:
model_svr_pred_water = model_svr_water.predict(testX_water)
model_svr_preds_water = np.reshape(model_svr_pred_water,(-1,1))
svr_water_actual = testY_water.to_numpy()
svr_water_actual = np.reshape(svr_water_actual,(-1,1))

In [18]:
model_svr_pred_elec = model_svr_elec.predict(testX_elec)
model_svr_preds_elec = np.reshape(model_svr_pred_elec,(-1,1))
svr_elec_actual = testY_elec.to_numpy()
svr_elec_actual = np.reshape(svr_elec_actual,(-1,1))

In [19]:
model_svr_pred_hot = model_svr_hot.predict(testX_hot)
model_svr_preds_hot = np.reshape(model_svr_pred_hot,(-1,1))
svr_hot_actual = testY_hot.to_numpy()
svr_hot_actual = np.reshape(svr_hot_actual,(-1,1))

In [20]:
from sklearn.metrics import mean_absolute_error
print('mean_absolute_error : ', mean_absolute_error(svr_water_actual, model_svr_preds_water))                                      

from sklearn.metrics import mean_squared_error
print('mean_squared_error : ', mean_squared_error(svr_water_actual, model_svr_preds_water))

from sklearn.metrics import r2_score   # model.score
print('r2_score : ', r2_score(svr_water_actual, model_svr_preds_water))

mean_absolute_error :  1.3931250677908815
mean_squared_error :  3.5386368930328276
r2_score :  0.7333137080848608


In [21]:
from sklearn.metrics import mean_absolute_error
print('mean_absolute_error : ', mean_absolute_error(svr_elec_actual, model_svr_preds_elec))                                      

from sklearn.metrics import mean_squared_error
print('mean_squared_error : ', mean_squared_error(svr_elec_actual, model_svr_preds_elec))

from sklearn.metrics import r2_score   # model.score
print('r2_score : ', r2_score(svr_elec_actual, model_svr_preds_elec))

mean_absolute_error :  18.69986089298566
mean_squared_error :  715.2368533167913
r2_score :  0.613199913153722


In [22]:
from sklearn.metrics import mean_absolute_error
print('mean_absolute_error : ', mean_absolute_error(svr_hot_actual, model_svr_preds_hot))                                      

from sklearn.metrics import mean_squared_error
print('mean_squared_error : ', mean_squared_error(svr_hot_actual, model_svr_preds_hot))

from sklearn.metrics import r2_score   # model.score
print('r2_score : ', r2_score(svr_hot_actual, model_svr_preds_hot))

mean_absolute_error :  0.7326609170599803
mean_squared_error :  1.1069285271135296
r2_score :  0.5930169292534612


## XGBoost

In [23]:
xgb_model_water = xgb.XGBRegressor(n_estimators=200, learning_rate=0.01)
xgb_model_elec = xgb.XGBRegressor(n_estimators=200, learning_rate=0.01)
xgb_model_hot = xgb.XGBRegressor(n_estimators=200, learning_rate=0.01)

In [24]:
xgb_model_water.fit(trainX_water, trainY_water)
xgb_model_elec.fit(trainX_elec, trainY_elec)
xgb_model_hot.fit(trainX_hot, trainY_hot)

In [25]:
xgb_model_pred_water = xgb_model_water.predict(testX_water)
xgb_model_preds_water = np.reshape(xgb_model_pred_water,(-1,1))
xgb_water_actual = testY_water.to_numpy()
xgb_water_actual = np.reshape(xgb_water_actual,(-1,1))

In [26]:
xgb_model_pred_elec = xgb_model_elec.predict(testX_elec)
xgb_model_preds_elec = np.reshape(xgb_model_pred_elec,(-1,1))
xgb_elec_actual = testY_elec.to_numpy()
xgb_elec_actual = np.reshape(xgb_elec_actual,(-1,1))

In [27]:
xgb_model_pred_hot = xgb_model_hot.predict(testX_hot)
xgb_model_preds_hot = np.reshape(xgb_model_pred_hot,(-1,1))
xgb_hot_actual = testY_hot.to_numpy()
xgb_hot_actual = np.reshape(xgb_hot_actual,(-1,1))

In [28]:
from sklearn.metrics import mean_absolute_error
print('mean_absolute_error : ', mean_absolute_error(xgb_water_actual, xgb_model_preds_water))                                      

from sklearn.metrics import mean_squared_error
print('mean_squared_error : ', mean_squared_error(xgb_water_actual, xgb_model_preds_water))

from sklearn.metrics import r2_score   # model.score
print('r2_score : ', r2_score(xgb_water_actual, xgb_model_preds_water))

mean_absolute_error :  1.205262487014817
mean_squared_error :  2.791157128080076
r2_score :  0.7896468704924908


In [29]:
from sklearn.metrics import mean_absolute_error
print('mean_absolute_error : ', mean_absolute_error(xgb_elec_actual, xgb_model_preds_elec))                                      

from sklearn.metrics import mean_squared_error
print('mean_squared_error : ', mean_squared_error(xgb_elec_actual, xgb_model_preds_elec))

from sklearn.metrics import r2_score   # model.score
print('r2_score : ', r2_score(xgb_elec_actual, xgb_model_preds_elec))

mean_absolute_error :  20.547817025308873
mean_squared_error :  632.6902513703582
r2_score :  0.6578411150907866


In [30]:
from sklearn.metrics import mean_absolute_error
print('mean_absolute_error : ', mean_absolute_error(xgb_hot_actual, xgb_model_preds_hot))                                      

from sklearn.metrics import mean_squared_error
print('mean_squared_error : ', mean_squared_error(xgb_hot_actual, xgb_model_preds_hot))

from sklearn.metrics import r2_score   # model.score
print('r2_score : ', r2_score(xgb_hot_actual, xgb_model_preds_hot))

mean_absolute_error :  0.5828764108744584
mean_squared_error :  0.6649049796484141
r2_score :  0.7555351915289266


## LightGBM

In [31]:
lgbm_model= LGBMRegressor(
    learning_rate=0.01
    , n_estimators=200
)

lgbm_model_water = lgbm_model.fit(trainX_water, trainY_water, eval_metric='mae', verbose=0)
lgbm_model_elec = lgbm_model.fit(trainX_elec, trainY_elec, eval_metric='mae', verbose=0)
lgbm_model_hot = lgbm_model.fit(trainX_hot, trainY_hot, eval_metric='mae', verbose=0)



In [32]:
lgbm_model_pred_water = lgbm_model_water.predict(testX_water)
lgbm_model_preds_water = np.reshape(lgbm_model_pred_water,(-1,1))
lgbm_water_actual = testY_water.to_numpy()
lgbm_water_actual = np.reshape(lgbm_water_actual,(-1,1))

In [33]:
lgbm_model_pred_elec = lgbm_model_elec.predict(testX_elec)
lgbm_model_preds_elec = np.reshape(lgbm_model_pred_elec,(-1,1))
lgbm_elec_actual = testY_elec.to_numpy()
lgbm_elec_actual = np.reshape(lgbm_elec_actual,(-1,1))

In [34]:
lgbm_model_pred_hot = lgbm_model_hot.predict(testX_hot)
lgbm_model_preds_hot = np.reshape(lgbm_model_pred_hot,(-1,1))
lgbm_hot_actual = testY_hot.to_numpy()
lgbm_hot_actual = np.reshape(lgbm_hot_actual,(-1,1))

In [36]:
from sklearn.metrics import mean_absolute_error
print('mean_absolute_error : ', mean_absolute_error(lgbm_water_actual, lgbm_model_preds_water))                                      

from sklearn.metrics import mean_squared_error
print('mean_squared_error : ', mean_squared_error(lgbm_water_actual, lgbm_model_preds_water))

from sklearn.metrics import r2_score   # model.score
print('r2_score : ', r2_score(lgbm_water_actual, lgbm_model_preds_water))

mean_absolute_error :  3.474307491281002
mean_squared_error :  19.35826803843533
r2_score :  -0.45891903496349484


In [37]:
from sklearn.metrics import mean_absolute_error
print('mean_absolute_error : ', mean_absolute_error(lgbm_elec_actual, lgbm_model_preds_elec))                                      

from sklearn.metrics import mean_squared_error
print('mean_squared_error : ', mean_squared_error(lgbm_elec_actual, lgbm_model_preds_elec))

from sklearn.metrics import r2_score   # model.score
print('r2_score : ', r2_score(lgbm_elec_actual, lgbm_model_preds_elec))

mean_absolute_error :  131.50344009223954
mean_squared_error :  19203.68084741694
r2_score :  -9.385350510258299


In [38]:
from sklearn.metrics import mean_absolute_error
print('mean_absolute_error : ', mean_absolute_error(lgbm_hot_actual, lgbm_model_preds_hot))                                      

from sklearn.metrics import mean_squared_error
print('mean_squared_error : ', mean_squared_error(lgbm_hot_actual, lgbm_model_preds_hot))

from sklearn.metrics import r2_score   # model.score
print('r2_score : ', r2_score(lgbm_hot_actual, lgbm_model_preds_hot))

mean_absolute_error :  0.6009917818666439
mean_squared_error :  0.6065938090735377
r2_score :  0.7769743889821461


## Random Forest

In [39]:
rf_water = RandomForestRegressor(random_state=0, max_depth=5, min_samples_leaf=8, min_samples_split=8,n_estimators=200)
rf_water.fit(trainX_water, trainY_water)

  rf_water.fit(trainX_water, trainY_water)


In [40]:
rf_elec = RandomForestRegressor(random_state=0, max_depth=5, min_samples_leaf=8, min_samples_split=8,n_estimators=200)
rf_elec.fit(trainX_elec, trainY_elec)

  rf_elec.fit(trainX_elec, trainY_elec)


In [41]:
rf_hot = RandomForestRegressor(random_state=0, max_depth=5, min_samples_leaf=8, min_samples_split=8,n_estimators=200)
rf_hot.fit(trainX_hot, trainY_hot)

  rf_hot.fit(trainX_hot, trainY_hot)


In [42]:
water_predict = rf_water.predict(testX_water)
water_actual = testY_water

elec_predict = rf_elec.predict(testX_elec)
elec_actual = testY_elec

hot_predict = rf_hot.predict(testX_hot)
hot_actual = testY_hot

In [43]:
from sklearn.metrics import mean_absolute_error
print('mean_absolute_error : ', mean_absolute_error(water_actual, water_predict))                                      

from sklearn.metrics import mean_squared_error
print('mean_squared_error : ', mean_squared_error(water_actual, water_predict))

from sklearn.metrics import r2_score   # model.score
print('r2_score : ', r2_score(water_actual, water_predict))

mean_absolute_error :  1.0774529204121255
mean_squared_error :  2.2391856445999854
r2_score :  0.8312457212991475


In [44]:
from sklearn.metrics import mean_absolute_error
print('mean_absolute_error : ', mean_absolute_error(elec_actual, elec_predict))                                      

from sklearn.metrics import mean_squared_error
print('mean_squared_error : ', mean_squared_error(elec_actual, elec_predict))

from sklearn.metrics import r2_score   # model.score
print('r2_score : ', r2_score(elec_actual, elec_predict))

mean_absolute_error :  11.899489262490864
mean_squared_error :  244.82622765849612
r2_score :  0.867597977255502


In [45]:
from sklearn.metrics import mean_absolute_error
print('mean_absolute_error : ', mean_absolute_error(hot_actual, hot_predict))                                      

from sklearn.metrics import mean_squared_error
print('mean_squared_error : ', mean_squared_error(hot_actual, hot_predict))

from sklearn.metrics import r2_score   # model.score
print('r2_score : ', r2_score(hot_actual, hot_predict))

mean_absolute_error :  0.6403620979484479
mean_squared_error :  0.7285097509946535
r2_score :  0.7321497022922135
