In [23]:
# importing all the associated libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet

In [3]:
# importing the dataset

raw_energy_data= pd.read_csv('energydata_complete_ (Appliances energy prediction Data Set).csv')
raw_energy_data.head(2)

Unnamed: 0,date,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,2016-01-11 17:00:00,60,30,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,2016-01-11 17:10:00,60,30,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,...,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195


In [4]:
# changing the names of the columns

column_names= ({'T1': 'Temp_kitc', 'RH_1': 'Humi_kitc', 'T2': 'Temp_livi', 'RH_2': 'Humi_livi', 'T3': 'Temp_laun',
                'RH_3': 'Humi_laun', 'T4': 'Temp_offi', 'RH_4': 'Humi_offi', 'T5': 'Temp_bath', 'RH_5': 'Humi_bath',
                'T6': 'Temp_out','RH_6': 'Humi_out', 'T7': 'Temp_iron', 'RH_7': 'Humi_iron', 'T8': 'Temp_teen', 'RH_8': 'Humi_teen',
                'T9': 'Temp_pare', 'RH_9': 'Humi_pare', 'T_out': 'Temp_out_Chev', 'Press_mm_hg': 'Press_out_Chev',
                'RH_out': 'Humi_out_Chev'})

energy_data= raw_energy_data.rename(columns= column_names)
energy_data.head(2)

Unnamed: 0,date,Appliances,lights,Temp_kitc,Humi_kitc,Temp_livi,Humi_livi,Temp_laun,Humi_laun,Temp_offi,...,Temp_pare,Humi_pare,Temp_out_Chev,Press_out_Chev,Humi_out_Chev,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,2016-01-11 17:00:00,60,30,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,2016-01-11 17:10:00,60,30,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,...,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195


In [5]:
# removing ['dates', 'lights'] columns

new_data= energy_data.drop(columns= ['date', 'lights'])
new_data.head(2)

Unnamed: 0,Appliances,Temp_kitc,Humi_kitc,Temp_livi,Humi_livi,Temp_laun,Humi_laun,Temp_offi,Humi_offi,Temp_bath,...,Temp_pare,Humi_pare,Temp_out_Chev,Press_out_Chev,Humi_out_Chev,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,60,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,45.566667,17.166667,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,60,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,45.9925,17.166667,...,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195


In [6]:
# normalizing the dataset using MinMaxScaler

scaler= MinMaxScaler()
df= pd.DataFrame(scaler.fit_transform(new_data), columns= new_data.columns)
df.head(2)

Unnamed: 0,Appliances,Temp_kitc,Humi_kitc,Temp_livi,Humi_livi,Temp_laun,Humi_laun,Temp_offi,Humi_offi,Temp_bath,...,Temp_pare,Humi_pare,Temp_out_Chev,Press_out_Chev,Humi_out_Chev,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,0.046729,0.32735,0.566187,0.225345,0.684038,0.215188,0.746066,0.351351,0.764262,0.175506,...,0.223032,0.67729,0.37299,0.097674,0.894737,0.5,0.953846,0.538462,0.265449,0.265449
1,0.046729,0.32735,0.541326,0.225345,0.68214,0.215188,0.748871,0.351351,0.782437,0.175506,...,0.2265,0.678532,0.369239,0.1,0.894737,0.47619,0.894872,0.533937,0.372083,0.372083


In [8]:
# separating out the features from the target variable ('Appliances')

x= df.drop(columns= 'Appliances')
y= df['Appliances']
x.head(2)

Unnamed: 0,Temp_kitc,Humi_kitc,Temp_livi,Humi_livi,Temp_laun,Humi_laun,Temp_offi,Humi_offi,Temp_bath,Humi_bath,...,Temp_pare,Humi_pare,Temp_out_Chev,Press_out_Chev,Humi_out_Chev,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,0.32735,0.566187,0.225345,0.684038,0.215188,0.746066,0.351351,0.764262,0.175506,0.381691,...,0.223032,0.67729,0.37299,0.097674,0.894737,0.5,0.953846,0.538462,0.265449,0.265449
1,0.32735,0.541326,0.225345,0.68214,0.215188,0.748871,0.351351,0.782437,0.175506,0.381691,...,0.2265,0.678532,0.369239,0.1,0.894737,0.47619,0.894872,0.533937,0.372083,0.372083


#### Q12.

In [9]:
# Temp_livi is the temperature of the living room while Temp_out is temperature outside the building

x1= x['Temp_livi']
x2= x['Temp_out']

In [11]:
# concatenating both features

x_total= pd.concat([x1, x2], axis= 1)
x_total.head(3)

Unnamed: 0,Temp_livi,Temp_out
0,0.225345,0.38107
1,0.225345,0.375443
2,0.225345,0.367487


In [13]:
# initialising LinearRegression and fitting it on the variables

regressor= LinearRegression()
regressor.fit(x_total, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [14]:
# r-square value

r_sq= regressor.score(x_total, y)
round(r_sq, 2)

0.02

#### Q13.

In [15]:
# splitting dataset into trains and tests

x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.3, random_state= 42)

In [16]:
# fitting regression algorithm on trains

regressor= LinearRegression()
regressor.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [18]:
# predicting new values of y('Appliances')

y_pred= regressor.predict(x_test)

In [19]:
# comparing the predicted values(y_pred) with the actual values(y_test)

df1= pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
df1.head(3)

Unnamed: 0,Actual,Predicted
8980,0.028037,0.033222
2754,0.074766,0.244116
9132,0.037383,0.034


In [20]:
# Mean_absolute_error

mse= mean_absolute_error(y_test, y_pred)
round(mse, 3)

0.05

In [21]:
# Sum of squared error

rss= np.sum(np.square(y_test - y_pred))
round(rss, 2)

45.35

In [24]:
# Root mean squared error

rmse= np.sqrt(mean_squared_error(y_test, y_pred))
round(rmse, 3)

0.088

In [25]:
# coefficient of determination

r2= r2_score(y_test, y_pred)
round(r2, 2)

0.15

#### Q.17

In [27]:
# Ridge method

ridge_reg= Ridge(alpha= 0.4)
ridge_reg.fit(x_train, y_train)
y_pred2= ridge_reg.predict(x_test)

In [28]:
# lasso method

lasso_reg= Lasso(alpha= 0.001)
lasso_reg.fit(x_train, y_train)
y_pred3= lasso_reg.predict(x_test)

In [31]:
# summary of all penalization techniques

def get_weights_df(model, feat, col_name):
    #this function returns the weight of every feature
    weights= pd.Series(model.coef_, feat.columns).sort_values()
    weights_df= pd.DataFrame(weights).reset_index()
    weights_df.columns= ['Features', col_name]
    weights_df[col_name].round(3)
    return weights_df

linear_model_weights= get_weights_df(regressor, x_train, 'regressor_Weight')
ridge_weights_df= get_weights_df(ridge_reg, x_train, 'Ridge_weight')
lasso_weights_df= get_weights_df(lasso_reg, x_train, 'Lasso_weight')

final_weights= pd.merge(linear_model_weights, ridge_weights_df, on= 'Features')
final_weights= pd.merge(final_weights, lasso_weights_df, on= 'Features')
final_weights

Unnamed: 0,Features,regressor_Weight,Ridge_weight,Lasso_weight
0,Humi_livi,-0.456698,-0.411071,-0.0
1,Temp_out_Chev,-0.32186,-0.262172,0.0
2,Temp_livi,-0.236178,-0.201397,0.0
3,Temp_pare,-0.189941,-0.188916,-0.0
4,Humi_teen,-0.157595,-0.15683,-0.00011
5,Humi_out_Chev,-0.077671,-0.054724,-0.049557
6,Humi_iron,-0.044614,-0.045977,-0.0
7,Humi_pare,-0.0398,-0.041367,-0.0
8,Temp_bath,-0.015657,-0.019853,-0.0
9,Temp_kitc,-0.003281,-0.018406,0.0


In [33]:
final_weights['regressor_Weight'].max()

0.5535465998386386

In [34]:
final_weights['regressor_Weight'].min()

-0.45669794833849997

#### Q.18

In [35]:
# Ridge method

ridge_reg= Ridge(alpha= 0.4)
ridge_reg.fit(x_train, y_train)
y_pred2= ridge_reg.predict(x_test)

In [36]:
# Root mean squared error on Ridge

rmse= np.sqrt(mean_squared_error(y_test, y_pred2))
round(rmse, 3)

0.088

#### Q20

In [None]:
# lasso method

lasso_reg= Lasso(alpha= 0.001)
lasso_reg.fit(x_train, y_train)
y_pred3= lasso_reg.predict(x_test)

In [40]:
# Root mean squared error on Lasso

rmse= np.sqrt(mean_squared_error(y_test, y_pred3))
round(rmse, 3)

0.094