In [35]:
#importing libraries
import pandas as pd
import numpy as np
import seaborn as sns

df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00374/energydata_complete.csv')
# df.head()
# df.shape()
dataset = df.drop(columns=['date', 'lights'])
# print(dataset.head())

#Normlaising dataset to a common scale using the min max scaler (Feature scaling)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
normalised_df = pd.DataFrame(scaler.fit_transform(dataset), columns=dataset.columns)
# print(normalised_df.head())
features_df = normalised_df.drop(columns='Appliances')
target_variable = normalised_df['Appliances']

#Split dataset into train and test set. 
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(features_df, target_variable, test_size=0.3, random_state=42)

# Fitting Simple Linear Regression to Training set
from sklearn.linear_model import LinearRegression 
linear_model = LinearRegression()
#fit the model to the training dataset
linear_model.fit(x_train, y_train)
#obtain predictions
predicted_values = linear_model.predict(x_test)

#MAE
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, predicted_values)
print("MAE: {}".format(round(mae, 2))) #prints 0.05

#RSS
rss = np.sum(np.square(y_test - predicted_values))
print("RSS: {}".format(round(rss, 2))) #prints 45.35

#RMSE
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_test, predicted_values))
print("RMSE: {}".format(round(rmse, 3))) #prints 0.088

#R_Squared
from sklearn.metrics import r2_score
r2_score = r2_score(y_test, predicted_values)
print("R_Squared: {}".format(round(r2_score, 2))) #prints 0.15

## Question 17
def get_weights_df(model, feat, col_name):
  #this function returns the weight of every feature
  weights = pd.Series(model.coef_, feat.columns).sort_values()
  weights_df = pd.DataFrame(weights).reset_index()
  weights_df.columns = ['Features', col_name]
  weights_df[col_name].round(3)
  return weights_df

linear_model_weights = get_weights_df(linear_model, x_train, 'Linear_Model_Weight')
# print(linear_model_weights)

## Question 18 (Ridge Regression)
from sklearn.linear_model import Ridge
ridge_reg = Ridge(alpha=0.4)
ridge_reg.fit(x_train, y_train)
ridge_predicted_values = ridge_reg.predict(x_test)

# RMSE (Ridge)
from sklearn.metrics import mean_squared_error
rmse_ridge = np.sqrt(mean_squared_error(y_test, ridge_predicted_values))
print("RMSE_Ridge: {}".format(round(rmse_ridge, 3))) #prints 0.088

## Question 19
from sklearn.linear_model import Lasso
lasso_reg = Lasso(alpha=0.001)
lasso_fit = lasso_reg.fit(x_train, y_train)
print("Number of weight features used: {}".format(np.sum(lasso_fit.coef_ != 0)))

## Question 20
lasso_predicted_values = lasso_reg.predict(x_test)

# RMSE (Lasso)
from sklearn.metrics import mean_squared_error
rmse_lasso = np.sqrt(mean_squared_error(y_test, lasso_predicted_values))
print("RMSE_Lasso: {}".format(round(rmse_lasso, 3))) #prints 0.094

MAE: 0.05
RSS: 45.35
RMSE: 0.088
R_Squared: 0.15
RMSE_Ridge: 0.088
Number of weight features used: 4
RMSE_Lasso: 0.094


### Question 12

In [47]:
# New model features
living_room_temp_df = normalised_df['T2']
outside_temp = normalised_df['T6']

#Split dataset into train and test set. 
x_new_train, x_new_test, y_new_train, y_new_test = train_test_split(living_room_temp, outside_temp, 
                                                                    test_size=0.3, random_state=42)
x_new_train = np.array(x_new_train).reshape(-1,1)
y_new_train = np.array(y_new_train).reshape(-1,1)

# Fitting Simple Linear Regression to Training set
linear_model.fit(x_new_train, y_new_train)
#obtain predictions
x_new_test = np.array(x_new_test).reshape(-1,1)
y_new_test = np.array(y_new_test).reshape(-1,1)
new_predicted_values = linear_model.predict(x_new_test)

#R_Squared
from sklearn.metrics import r2_score
new_r2_score = r2_score(y_new_test, new_predicted_values)
print("R_Squared: {}".format(round(new_r2_score, 2)))

R_Squared: 0.64
