In [6]:
# Title:     SmartHome Dataset
# Authors:   Aline J
# Date:      01/06/2020
# Goal:      Analyze and predict energy use efficiency 
#            in a smarthouse during a year under variable weather conditions.
# Questions: 
# 1) What are the variables that influence energy efficiency? 
# 2) When is 
### Begins Here ####
# 1. Import dependencies
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt # plotting
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from scipy import stats
import string as string
import warnings
warnings.filterwarnings('ignore')

In [15]:
# 2. Import Raw data file into a data frame
csv_path = "Data/HomeC.csv"
my_data = pd.read_csv(csv_path,   parse_dates=True)
home_dat = my_data.select_dtypes(exclude=['object'])

df = pd.read_csv(csv_path, delimiter=',')
# df1.dataframeName =df
#nRow, nCol = df.shape
nRow, nCol = my_data.shape
print(f'There are {nRow} rows and {nCol} columns')


There are 503911 rows and 32 columns


In [16]:
time_index = pd.date_range('2016-01-01 05:00', periods=503911,  freq='min')  
time_index = pd.DatetimeIndex(time_index)
home_dat = home_dat.set_index(time_index)
# Check that a dataset has been uploaded into my_data

In [17]:
my_data.head(5)

Unnamed: 0,time,use [kW],gen [kW],House overall [kW],Dishwasher [kW],Furnace 1 [kW],Furnace 2 [kW],Home office [kW],Fridge [kW],Wine cellar [kW],...,visibility,summary,apparentTemperature,pressure,windSpeed,cloudCover,windBearing,precipIntensity,dewPoint,precipProbability
0,1451624400,0.932833,0.003483,0.932833,3.3e-05,0.0207,0.061917,0.442633,0.12415,0.006983,...,10.0,Clear,29.26,1016.91,9.18,cloudCover,282.0,0.0,24.4,0.0
1,1451624401,0.934333,0.003467,0.934333,0.0,0.020717,0.063817,0.444067,0.124,0.006983,...,10.0,Clear,29.26,1016.91,9.18,cloudCover,282.0,0.0,24.4,0.0
2,1451624402,0.931817,0.003467,0.931817,1.7e-05,0.0207,0.062317,0.446067,0.123533,0.006983,...,10.0,Clear,29.26,1016.91,9.18,cloudCover,282.0,0.0,24.4,0.0
3,1451624403,1.02205,0.003483,1.02205,1.7e-05,0.1069,0.068517,0.446583,0.123133,0.006983,...,10.0,Clear,29.26,1016.91,9.18,cloudCover,282.0,0.0,24.4,0.0
4,1451624404,1.1394,0.003467,1.1394,0.000133,0.236933,0.063983,0.446533,0.12285,0.00685,...,10.0,Clear,29.26,1016.91,9.18,cloudCover,282.0,0.0,24.4,0.0


In [18]:
energy_data = home_dat.filter(items=[ 'gen [kW]', 'House overall [kW]', 'Dishwasher [kW]',
                                     'Furnace 1 [kW]', 'Furnace 2 [kW]', 'Home office [kW]', 'Fridge [kW]',
                                     'Wine cellar [kW]', 'Garage door [kW]', 'Kitchen 12 [kW]',
                                     'Kitchen 14 [kW]', 'Kitchen 38 [kW]', 'Barn [kW]', 'Well [kW]',
                                     'Microwave [kW]', 'Living room [kW]', 'Solar [kW]'])
energy_data.head(10)


Unnamed: 0,gen [kW],House overall [kW],Dishwasher [kW],Furnace 1 [kW],Furnace 2 [kW],Home office [kW],Fridge [kW],Wine cellar [kW],Garage door [kW],Kitchen 12 [kW],Kitchen 14 [kW],Kitchen 38 [kW],Barn [kW],Well [kW],Microwave [kW],Living room [kW],Solar [kW]
2016-01-01 05:00:00,0.003483,0.932833,3.3e-05,0.0207,0.061917,0.442633,0.12415,0.006983,0.013083,0.000417,0.00015,0.0,0.03135,0.001017,0.004067,0.001517,0.003483
2016-01-01 05:01:00,0.003467,0.934333,0.0,0.020717,0.063817,0.444067,0.124,0.006983,0.013117,0.000417,0.00015,0.0,0.0315,0.001017,0.004067,0.00165,0.003467
2016-01-01 05:02:00,0.003467,0.931817,1.7e-05,0.0207,0.062317,0.446067,0.123533,0.006983,0.013083,0.000433,0.000167,1.7e-05,0.031517,0.001,0.004067,0.00165,0.003467
2016-01-01 05:03:00,0.003483,1.02205,1.7e-05,0.1069,0.068517,0.446583,0.123133,0.006983,0.013,0.000433,0.000217,0.0,0.0315,0.001017,0.004067,0.001617,0.003483
2016-01-01 05:04:00,0.003467,1.1394,0.000133,0.236933,0.063983,0.446533,0.12285,0.00685,0.012783,0.00045,0.000333,0.0,0.0315,0.001017,0.004067,0.001583,0.003467
2016-01-01 05:05:00,0.003433,1.391867,0.000283,0.50325,0.063667,0.447033,0.1223,0.006717,0.012433,0.000483,0.000567,0.0,0.03145,0.001017,0.004067,0.001583,0.003433
2016-01-01 05:06:00,0.00345,1.366217,0.000283,0.4994,0.063717,0.443267,0.12205,0.006733,0.012417,0.000517,0.00055,0.0,0.03155,0.001033,0.004117,0.001533,0.00345
2016-01-01 05:07:00,0.003417,1.4319,0.00025,0.477867,0.178633,0.444283,0.1218,0.006783,0.01255,0.000483,0.00045,0.0,0.031733,0.001033,0.0042,0.00155,0.003417
2016-01-01 05:08:00,0.003417,1.6273,0.000183,0.44765,0.3657,0.441467,0.121617,0.00695,0.012717,0.000467,0.0003,1.7e-05,0.031767,0.001017,0.0042,0.001567,0.003417
2016-01-01 05:09:00,0.003417,1.735383,1.7e-05,0.17155,0.6825,0.438733,0.121633,0.007233,0.01335,0.000367,5e-05,0.0,0.031667,0.001017,0.0042,0.001617,0.003417


In [19]:
weather_data = home_dat.filter(items=['temperature',
                                      'humidity', 'visibility', 'apparentTemperature', 'pressure',
                                      'windSpeed', 'windBearing', 'dewPoint'])
#weather_data.head()
weather_data.tail()

Unnamed: 0,temperature,humidity,visibility,apparentTemperature,pressure,windSpeed,windBearing,dewPoint
2016-12-16 03:26:00,35.12,0.86,8.74,29.45,1011.49,6.72,186.0,31.27
2016-12-16 03:27:00,35.12,0.86,8.74,29.45,1011.49,6.72,186.0,31.27
2016-12-16 03:28:00,35.12,0.86,8.74,29.45,1011.49,6.72,186.0,31.27
2016-12-16 03:29:00,35.12,0.86,8.74,29.45,1011.49,6.72,186.0,31.27
2016-12-16 03:30:00,,,,,,,,


In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 503911 entries, 0 to 503910
Data columns (total 32 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   time                 503911 non-null  object 
 1   use [kW]             503910 non-null  float64
 2   gen [kW]             503910 non-null  float64
 3   House overall [kW]   503910 non-null  float64
 4   Dishwasher [kW]      503910 non-null  float64
 5   Furnace 1 [kW]       503910 non-null  float64
 6   Furnace 2 [kW]       503910 non-null  float64
 7   Home office [kW]     503910 non-null  float64
 8   Fridge [kW]          503910 non-null  float64
 9   Wine cellar [kW]     503910 non-null  float64
 10  Garage door [kW]     503910 non-null  float64
 11  Kitchen 12 [kW]      503910 non-null  float64
 12  Kitchen 14 [kW]      503910 non-null  float64
 13  Kitchen 38 [kW]      503910 non-null  float64
 14  Barn [kW]            503910 non-null  float64
 15  Well [kW]        

In [21]:
tmp_str = "Feature(attribute)     DataType"; print(tmp_str+"\n"+"-"*len(tmp_str))
print(df.dtypes)

Feature(attribute)     DataType
-------------------------------
time                    object
use [kW]               float64
gen [kW]               float64
House overall [kW]     float64
Dishwasher [kW]        float64
Furnace 1 [kW]         float64
Furnace 2 [kW]         float64
Home office [kW]       float64
Fridge [kW]            float64
Wine cellar [kW]       float64
Garage door [kW]       float64
Kitchen 12 [kW]        float64
Kitchen 14 [kW]        float64
Kitchen 38 [kW]        float64
Barn [kW]              float64
Well [kW]              float64
Microwave [kW]         float64
Living room [kW]       float64
Solar [kW]             float64
temperature            float64
icon                    object
humidity               float64
visibility             float64
summary                 object
apparentTemperature    float64
pressure               float64
windSpeed              float64
cloudCover              object
windBearing            float64
precipIntensity        float64
dewPoi

In [22]:
df.tail(10)

Unnamed: 0,time,use [kW],gen [kW],House overall [kW],Dishwasher [kW],Furnace 1 [kW],Furnace 2 [kW],Home office [kW],Fridge [kW],Wine cellar [kW],...,visibility,summary,apparentTemperature,pressure,windSpeed,cloudCover,windBearing,precipIntensity,dewPoint,precipProbability
503901,1452128301,1.537383,0.003183,1.537383,0.000133,0.021683,0.642733,0.042033,0.005283,0.008333,...,8.74,Light Rain,29.45,1011.49,6.72,0.31,186.0,0.0101,31.27,0.51
503902,1452128302,1.551817,0.0032,1.551817,5e-05,0.0562,0.624783,0.04175,0.00525,0.00845,...,8.74,Light Rain,29.45,1011.49,6.72,0.31,186.0,0.0101,31.27,0.51
503903,1452128303,1.599617,0.003217,1.599617,6.7e-05,0.089217,0.63865,0.04175,0.005617,0.008467,...,8.74,Light Rain,29.45,1011.49,6.72,0.31,186.0,0.0101,31.27,0.51
503904,1452128304,1.608867,0.003217,1.608867,3.3e-05,0.1143,0.623283,0.041817,0.005217,0.00835,...,8.74,Light Rain,29.45,1011.49,6.72,0.31,186.0,0.0101,31.27,0.51
503905,1452128305,1.601233,0.003183,1.601233,5e-05,0.085267,0.642417,0.041783,0.005267,0.008667,...,8.74,Light Rain,29.45,1011.49,6.72,0.31,186.0,0.0101,31.27,0.51
503906,1452128306,1.599333,0.003233,1.599333,5e-05,0.104017,0.625033,0.04175,0.005233,0.008433,...,8.74,Light Rain,29.45,1011.49,6.72,0.31,186.0,0.0101,31.27,0.51
503907,1452128307,1.924267,0.003217,1.924267,3.3e-05,0.422383,0.637733,0.042033,0.004983,0.008467,...,8.74,Light Rain,29.45,1011.49,6.72,0.31,186.0,0.0101,31.27,0.51
503908,1452128308,1.9782,0.003217,1.9782,5e-05,0.495667,0.620367,0.0421,0.005333,0.008233,...,8.74,Light Rain,29.45,1011.49,6.72,0.31,186.0,0.0101,31.27,0.51
503909,1452128309,1.99095,0.003233,1.99095,5e-05,0.4947,0.634133,0.0421,0.004917,0.008133,...,8.74,Light Rain,29.45,1011.49,6.72,0.31,186.0,0.0101,31.27,0.51
503910,\,,,,,,,,,,...,,,,,,,,,,


In [23]:
df = df[0:-1] ## == dataset[0:dataset.shape[0]-1] == dataset[0:len(dataset)-1] == dataset[:-1]
df.tail()

Unnamed: 0,time,use [kW],gen [kW],House overall [kW],Dishwasher [kW],Furnace 1 [kW],Furnace 2 [kW],Home office [kW],Fridge [kW],Wine cellar [kW],...,visibility,summary,apparentTemperature,pressure,windSpeed,cloudCover,windBearing,precipIntensity,dewPoint,precipProbability
503905,1452128305,1.601233,0.003183,1.601233,5e-05,0.085267,0.642417,0.041783,0.005267,0.008667,...,8.74,Light Rain,29.45,1011.49,6.72,0.31,186.0,0.0101,31.27,0.51
503906,1452128306,1.599333,0.003233,1.599333,5e-05,0.104017,0.625033,0.04175,0.005233,0.008433,...,8.74,Light Rain,29.45,1011.49,6.72,0.31,186.0,0.0101,31.27,0.51
503907,1452128307,1.924267,0.003217,1.924267,3.3e-05,0.422383,0.637733,0.042033,0.004983,0.008467,...,8.74,Light Rain,29.45,1011.49,6.72,0.31,186.0,0.0101,31.27,0.51
503908,1452128308,1.9782,0.003217,1.9782,5e-05,0.495667,0.620367,0.0421,0.005333,0.008233,...,8.74,Light Rain,29.45,1011.49,6.72,0.31,186.0,0.0101,31.27,0.51
503909,1452128309,1.99095,0.003233,1.99095,5e-05,0.4947,0.634133,0.0421,0.004917,0.008133,...,8.74,Light Rain,29.45,1011.49,6.72,0.31,186.0,0.0101,31.27,0.51


In [24]:
df.columns

Index(['time', 'use [kW]', 'gen [kW]', 'House overall [kW]', 'Dishwasher [kW]',
       'Furnace 1 [kW]', 'Furnace 2 [kW]', 'Home office [kW]', 'Fridge [kW]',
       'Wine cellar [kW]', 'Garage door [kW]', 'Kitchen 12 [kW]',
       'Kitchen 14 [kW]', 'Kitchen 38 [kW]', 'Barn [kW]', 'Well [kW]',
       'Microwave [kW]', 'Living room [kW]', 'Solar [kW]', 'temperature',
       'icon', 'humidity', 'visibility', 'summary', 'apparentTemperature',
       'pressure', 'windSpeed', 'cloudCover', 'windBearing', 'precipIntensity',
       'dewPoint', 'precipProbability'],
      dtype='object')

In [25]:
df.columns = [col.replace(' [kW]', '') for col in df.columns]
df.columns

Index(['time', 'use', 'gen', 'House overall', 'Dishwasher', 'Furnace 1',
       'Furnace 2', 'Home office', 'Fridge', 'Wine cellar', 'Garage door',
       'Kitchen 12', 'Kitchen 14', 'Kitchen 38', 'Barn', 'Well', 'Microwave',
       'Living room', 'Solar', 'temperature', 'icon', 'humidity', 'visibility',
       'summary', 'apparentTemperature', 'pressure', 'windSpeed', 'cloudCover',
       'windBearing', 'precipIntensity', 'dewPoint', 'precipProbability'],
      dtype='object')

In [26]:
df['sum_Furnace'] = df[['Furnace 1','Furnace 2']].sum(axis=1)
df['avg_Kitchen'] = df[['Kitchen 12','Kitchen 14','Kitchen 38']].mean(axis=1)
df.columns

Index(['time', 'use', 'gen', 'House overall', 'Dishwasher', 'Furnace 1',
       'Furnace 2', 'Home office', 'Fridge', 'Wine cellar', 'Garage door',
       'Kitchen 12', 'Kitchen 14', 'Kitchen 38', 'Barn', 'Well', 'Microwave',
       'Living room', 'Solar', 'temperature', 'icon', 'humidity', 'visibility',
       'summary', 'apparentTemperature', 'pressure', 'windSpeed', 'cloudCover',
       'windBearing', 'precipIntensity', 'dewPoint', 'precipProbability',
       'sum_Furnace', 'avg_Kitchen'],
      dtype='object')