**THIS FILE CONTAINS THE CODE FOR MODELLING AND DOING THE HYPERPARAMETER TUNING FOR CATBOOST MODEL.**

In [None]:
from google.colab import drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from tqdm import tqdm
import sklearn

In [None]:
pip install catboost

Collecting catboost
[?25l  Downloading https://files.pythonhosted.org/packages/20/37/bc4e0ddc30c07a96482abf1de7ed1ca54e59bba2026a33bca6d2ef286e5b/catboost-0.24.4-cp36-none-manylinux1_x86_64.whl (65.7MB)
[K     |████████████████████████████████| 65.8MB 48kB/s 
Installing collected packages: catboost
Successfully installed catboost-0.24.4


In [None]:
from catboost import CatBoostRegressor

In [None]:
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
from sklearn.model_selection import StratifiedKFold,KFold
import lightgbm as lgb
from lightgbm import LGBMRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_squared_error
import xgboost
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
from scipy.stats import uniform,randint
from sklearn.model_selection import train_test_split

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
file=open('/content/drive/MyDrive/Project Energy Consumption/df_tr_red_final_modified.txt','rb')
df_tr_red_final=pickle.load(file)

In [None]:
df_tr_red_final.drop(['index','timestamp'],axis=1,inplace=True)

In [None]:
df_tr_red_final.reset_index(inplace=True)

**TARGET TRANSFORMATION**

1.   AS THE METRIC IS RMSLE I AM TAKING THE LOG1P OF THE METER READINGS THEN TAKING THE EVALUATION METRIC TO BE RMSE.



In [None]:
y_tr=np.log1p(df_tr_red_final['meter_reading'])
df_tr_red_final.drop('meter_reading',axis=1,inplace=True)

**DROPPING THE FEATURES WHICH ARE NOT IMPORTANT**

In [None]:
df_tr_red_final.drop(['cloud_coverage','sea_level_pressure','wind_direction','wind_speed',
                      'is_summer_month','is_pub_holiday'],axis=1,inplace=True)

In [None]:
df_tr_red_final.drop('index',axis=1,inplace=True)

**DIVIDING THE DATA INTO TRAIN AND TEST**

In [None]:
X_train,X_test,y_train,y_test=train_test_split(df_tr_red_final,y_tr,test_size=0.2,random_state=0)

**HYPERPARAMETER TUNING**

1.   HERE I AM DOING THE HYPERPARAMETER TUNING USING RANDOMIZED SEARCH CV WITH THE USE OF GPU.
2.   IT HELPS THE MODEL TO FIND THE BEST PARAMS REQUIRED FOR TRAINING.



In [None]:
params=[]
err_score=[]
for i in range(10):
  max_depth=np.random.randint(3,15)
  estimators=np.random.randint(300,1500)

 


  cat_reg=CatBoostRegressor(task_type='GPU',loss_function='RMSE',max_depth=max_depth,n_estimators=estimators,learning_rate=0.1)
  cat_reg.fit(X_train,y_train)
  test_pred=cat_reg.predict(X_test)
  err_test=np.sqrt(mean_squared_error(y_test,test_pred))
  err_score.append(err_test)
  params.append((max_depth,estimators))
  

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
350:	learn: 1.2588643	total: 23.9s	remaining: 43.2s
351:	learn: 1.2583791	total: 24s	remaining: 43.2s
352:	learn: 1.2576931	total: 24s	remaining: 43.1s
353:	learn: 1.2570125	total: 24.1s	remaining: 43s
354:	learn: 1.2567989	total: 24.2s	remaining: 42.9s
355:	learn: 1.2565455	total: 24.2s	remaining: 42.9s
356:	learn: 1.2563691	total: 24.3s	remaining: 42.8s
357:	learn: 1.2560409	total: 24.4s	remaining: 42.7s
358:	learn: 1.2557458	total: 24.4s	remaining: 42.7s
359:	learn: 1.2548241	total: 24.5s	remaining: 42.6s
360:	learn: 1.2542891	total: 24.6s	remaining: 42.5s
361:	learn: 1.2539804	total: 24.6s	remaining: 42.5s
362:	learn: 1.2533705	total: 24.7s	remaining: 42.4s
363:	learn: 1.2528535	total: 24.8s	remaining: 42.3s
364:	learn: 1.2522567	total: 24.8s	remaining: 42.2s
365:	learn: 1.2516343	total: 24.9s	remaining: 42.2s
366:	learn: 1.2508621	total: 25s	remaining: 42.1s
367:	learn: 1.2503042	total: 25s	remaining: 42s
368:	learn:

**FINDING THE ERROR SCORE FOR EACH OF THE HYPERPARAMTER**

In [None]:
err_score

[0.8014502674080963,
 0.969339306037929,
 1.1105379937315227,
 0.8017482632418587,
 1.083261361679219,
 0.7968043441249344,
 0.88638072298227,
 1.2696015010888373,
 1.0057081335900901,
 1.3935640553815056]

**PARAMS FOR HYPERPARAMETER TUNING**

In [None]:
params

[(13, 1183),
 (10, 467),
 (5, 1456),
 (14, 925),
 (6, 986),
 (13, 1289),
 (10, 756),
 (4, 928),
 (8, 696),
 (3, 695)]

**BEST PARAMS**

In [None]:
best_params=params[5]

**FITTING THE MODEL WITH THE BEST PARAMS**

In [None]:
cat_reg_final=CatBoostRegressor(max_depth=13,n_estimators=1289,task_type='GPU',learning_rate=0.1).fit(df_tr_red_final,y_tr)

0:	learn: 1.9908307	total: 174ms	remaining: 3m 43s
1:	learn: 1.9898607	total: 214ms	remaining: 2m 17s
2:	learn: 1.9137988	total: 382ms	remaining: 2m 43s
3:	learn: 1.9112792	total: 431ms	remaining: 2m 18s
4:	learn: 1.8423924	total: 592ms	remaining: 2m 31s
5:	learn: 1.8418948	total: 632ms	remaining: 2m 15s
6:	learn: 1.7837707	total: 801ms	remaining: 2m 26s
7:	learn: 1.7741040	total: 879ms	remaining: 2m 20s
8:	learn: 1.7245129	total: 1.04s	remaining: 2m 28s
9:	learn: 1.7242654	total: 1.08s	remaining: 2m 18s
10:	learn: 1.6820146	total: 1.25s	remaining: 2m 24s
11:	learn: 1.6811078	total: 1.29s	remaining: 2m 16s
12:	learn: 1.6434450	total: 1.45s	remaining: 2m 22s
13:	learn: 1.6349142	total: 1.56s	remaining: 2m 21s
14:	learn: 1.6037291	total: 1.73s	remaining: 2m 26s
15:	learn: 1.6036562	total: 1.77s	remaining: 2m 20s
16:	learn: 1.5805302	total: 1.93s	remaining: 2m 24s
17:	learn: 1.5804832	total: 1.97s	remaining: 2m 19s
18:	learn: 1.5562731	total: 2.15s	remaining: 2m 23s
19:	learn: 1.5562444	t

**STORING THE BEST MODEL IN THE FORM OF PICKLE FILE AND THEN DUMPING IT INTO DRIVE**

In [None]:
filename='catboost_model.txt'
my_file=open(filename,'wb')

In [None]:
pickle.dump(cat_reg_final,my_file)