# NYC Building Oil Consumption for Year 2017
1. Data Inspection and Preparation
2. Number of buildings becoming retiring boilers over years (line and dots)
3. Estimated high and low BTU consumption by building area (two charts)

In [1]:
import pandas as pd

from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.io import show, output_notebook
from bokeh.palettes import Category10
from bokeh.transform import factor_cmap
from bokeh.layouts import Row, Column
from bokeh.models import BoxSelectTool

output_notebook()

## 1. Data Inspection and Preparation
- View data, print columns, inspect data types
- Assign columns needed to ``boiler`` for boiler retirement plot
- Assign columns needed to ``btu`` for plots on BTU consumption by building area

In [2]:
# Load dataset and view the first few rows
data = pd.read_excel('NYCOilConsumption-2018.xlsx')
data.head()

Unnamed: 0,BBL_id,FacilityAddress,Zipcode,Latitude,Longitude,Borough,CommunityBoard,CensusTract,BIN,NTA,...,ComplianceDate,BuildingOwnership,BuildingType,CityCouncilDistrict,BuildingArea,NumOfBuildings,NumOfFloors,NumOfResidentialUnits,NumberOfTotalUnits,YearConstructed
0,1003230001,70 BARUCH DRIVE,10002,40.71727,-73.978272,Manhattan,3.0,1002.0,1078032.0,Lower East Side ...,...,,,Elevator Apartments,2,2948300,20,14,2391,2391,1962
1,2042050001,1400 PELHAM PKWY&amp;EASTCHESTER RD,10461,40.846961,-73.840633,Bronx,,,,,...,2015.0,,Elevator Apartments,13,2265343,11,12,0,4,1957
2,1009950033,124 43 STREET,10036,40.756164,-73.985213,Manhattan,,,,,...,2015.0,,Walk-Up Apartments,3,2245112,1,55,0,1,2005
3,1016040006,1450 MADISON AVENUE,10029,40.789679,-73.952412,Manhattan,11.0,168.0,1083922.0,East Harlem South ...,...,2014.0,,Elevator Apartments,8,2215000,9,14,0,1,1963
4,1016040006,1450 MADISON AVENUE,10029,40.789679,-73.952412,Manhattan,11.0,168.0,1083922.0,East Harlem South ...,...,2014.0,,Walk-Up Apartments,8,2215000,9,14,0,1,1963


In [4]:
# Print all available columns for easy grabs later
print(data.columns)

# Inspect the variable data types
data.info()

Index(['BBL_id', 'FacilityAddress', 'Zipcode', 'Latitude', 'Longitude',
       'Borough', 'CommunityBoard', 'CensusTract', 'BIN', 'NTA',
       'NaturalGasUtilityCompany', 'BoilerModel', 'NumIdenticalBoilers',
       'BoilerCapacity_GrossBTU', 'BoilerInstallationDate',
       'BoilerRetirement_dateEstimated', 'DuelFuelBoiler?', 'BoilerAgeRange',
       'BurnerModel', 'PrimaryFuel', 'TotalConsumption_HighEstimateMMBTUs',
       'TotalConsumption_LowEstimateMMBTUs', 'ComplyWithGreenerBuildingsLaws',
       'ComplianceDate', 'BuildingOwnership', 'BuildingType',
       'CityCouncilDistrict', 'BuildingArea', 'NumOfBuildings', 'NumOfFloors',
       'NumOfResidentialUnits', 'NumberOfTotalUnits', 'YearConstructed'],
      dtype='object')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8045 entries, 0 to 8044
Data columns (total 33 columns):
BBL_id                                 8045 non-null int64
FacilityAddress                        8045 non-null object
Zipcode                           

In [11]:
# Get building id and boiler retirement date 
boiler = data[['BBL_id','BoilerRetirement_dateEstimated']]
boiler = boiler.rename(columns={'BoilerRetirement_dateEstimated':'Year'})
boiler = boiler.groupby('Year').count().rename(columns={'BBL_id':'Number of Buildings'}).reset_index()
boiler.head()

Unnamed: 0,Year,Number of Buildings
0,2010,711
1,2011,71
2,2012,92
3,2013,140
4,2014,137


In [19]:
# Get columns for BTU consumption (heat consumption)
btu = data[['BuildingArea', 'TotalConsumption_HighEstimateMMBTUs', 
            'TotalConsumption_LowEstimateMMBTUs','FacilityAddress',
            'ComplyWithGreenerBuildingsLaws','BuildingType','NTA',
            'PrimaryFuel']]

# Rename columns to make life easier
btu = btu.rename(columns={'TotalConsumption_HighEstimateMMBTUs':'HighEstimate',
                          'TotalConsumption_LowEstimateMMBTUs':'LowEstimate',
                          'ComplyWithGreenerBuildingsLaws':'GreenBuilding'})
btu.head()

Unnamed: 0,BuildingArea,HighEstimate,LowEstimate,FacilityAddress,GreenBuilding,BuildingType,NTA,PrimaryFuel
0,2948300,39420,27594,70 BARUCH DRIVE,No,Elevator Apartments,Lower East Side ...,6 (Dirty Oil)
1,2265343,0,0,1400 PELHAM PKWY&amp;EASTCHESTER RD,Yes,Elevator Apartments,,6 (Dirty Oil)
2,2245112,3625,2538,124 43 STREET,Yes,Walk-Up Apartments,,4 (Clean Oil)
3,2215000,0,0,1450 MADISON AVENUE,Yes,Elevator Apartments,East Harlem South ...,6 (Dirty Oil)
4,2215000,176847,123793,1450 MADISON AVENUE,Yes,Walk-Up Apartments,East Harlem South ...,6 (Dirty Oil)


## 2. Retiring Boilers Over Years
- Use ``boiler`` to plot the number of buildings that will have retired boilders over years
- Create a ``boilerFigure`` and add line and circle glyphs
- Make sure dots and lines have at least following properties specified as needed: Size, color, nonselection_alpha, nonselection_color, and selection_color
- X axis should be labeled as '<strong>Years</strong>' and Y axis should be labeled as '<strong>Number of Buildings</strong>'