## Machine Learning: Regression - Predicting Energy Efficiency of Buildings
By: Volker Felvic Katche Tachin

29th November 2022

Note: some answers might appear different from the ones chosen on the time quized, as these new answers were realised after failing some questions on the quiz

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("energydata_complete.csv")

In [None]:
df.info()

In [4]:
lm = LinearRegression()
lm.fit(df[["T2"]], df[["T6"]])

In [5]:
# to calculate the r2 value of the relatonship
round(r2_score(df[["T6"]], lm.predict(df[["T2"]])), 2)

0.64

In [3]:
df_new = df.drop(labels = ['date', 'lights'], axis = 1)

scaler = MinMaxScaler()
df_new = pd.DataFrame(data = scaler.fit_transform(df_new), columns = df_new.columns, index = df_new.index)

# set features and target
X = df_new.drop(labels = ['Appliances'], axis = 1)
y = df_new.loc[:, 'Appliances']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

lm = LinearRegression()
lm.fit(X_train, y_train)

predictions = lm.predict(X_test)
print("mae:", round(mean_absolute_error(y_test, predictions), 2))
print("rss:", round(np.sum(np.square(y_test - predictions)), 2))
print("rmse:", round(mean_squared_error(y_test, predictions, squared = False), 3))
print("r2:", round(r2_score(y_test, predictions), 2))

mae: 0.05
rss: 45.35
rmse: 0.088
r2: 0.15


In [13]:
weights = pd.DataFrame(data = lm.coef_, columns = ["weights"], index = lm.feature_names_in_)
weights.sort_values(by = "weights")

Unnamed: 0,weights
RH_2,-0.456698
T_out,-0.32186
T2,-0.236178
T9,-0.189941
RH_8,-0.157595
RH_out,-0.077671
RH_7,-0.044614
RH_9,-0.0398
T5,-0.015657
T1,-0.003281


In [14]:
ridge = Ridge(alpha = 0.4)
ridge.fit(X_train, y_train)

predictions = ridge.predict(X_test)
print("rmse:", mean_squared_error(y_test, predictions, squared = False))

rmse: 0.08753385704628003


In [15]:
lasso = Lasso(alpha = 0.001)
lasso.fit(X_train, y_train)

preds = lasso.predict(X_test)
print("rmse:", round(mean_squared_error(y_test, preds, squared = False), 3))
lasso.coef_

rmse: 0.094


array([ 0.        ,  0.01787993,  0.        , -0.        ,  0.        ,
        0.        , -0.        ,  0.        , -0.        ,  0.        ,
        0.        , -0.        , -0.        , -0.        ,  0.        ,
       -0.00011004, -0.        , -0.        ,  0.        , -0.        ,
       -0.04955749,  0.00291176,  0.        ,  0.        , -0.        ,
       -0.        ])