In [985]:
import pandas as pd
import bqplot
import ipywidgets
import numpy as np

In [986]:
buildings = pd.read_csv('building_inventory.csv',
           na_values = {'Square Footage': 0,
                       'Year Acquired': 0,
                       'Year Constructed': 0})
buildings.head()

Unnamed: 0,Agency Name,Location Name,Address,City,Zip code,County,Congress Dist,Congressional Full Name,Rep Dist,Rep Full Name,...,Bldg Status,Year Acquired,Year Constructed,Square Footage,Total Floors,Floors Above Grade,Floors Below Grade,Usage Description,Usage Description 2,Usage Description 3
0,Department of Natural Resources,Anderson Lake Conservation Area - Fulton County,Anderson Lake C.a.,Astoria,61501,Fulton,17,Cheri Bustos,93,Hammond Norine K.,...,In Use,1975.0,1975.0,144.0,1,1,0,Unusual,Unusual,Not provided
1,Department of Natural Resources,Anderson Lake Conservation Area - Fulton County,Anderson Lake C.a.,Astoria,61501,Fulton,17,Cheri Bustos,93,Hammond Norine K.,...,In Use,2004.0,2004.0,144.0,1,1,0,Unusual,Unusual,Not provided
2,Department of Natural Resources,Anderson Lake Conservation Area - Fulton County,Anderson Lake C.a.,Astoria,61501,Fulton,17,Cheri Bustos,93,Hammond Norine K.,...,In Use,2004.0,2004.0,144.0,1,1,0,Unusual,Unusual,Not provided
3,Department of Natural Resources,Anderson Lake Conservation Area - Fulton County,Anderson Lake C.a.,Astoria,61501,Fulton,17,Cheri Bustos,93,Hammond Norine K.,...,In Use,2004.0,2004.0,144.0,1,1,0,Unusual,Unusual,Not provided
4,Department of Natural Resources,Anderson Lake Conservation Area - Fulton County,Anderson Lake C.a.,Astoria,61501,Fulton,17,Cheri Bustos,93,Hammond Norine K.,...,In Use,2004.0,2004.0,144.0,1,1,0,Unusual,Unusual,Not provided


### Heat Map

**Left component:**

Rows are congressional district

Columns are the governmental department (Agency Name) (note, the agency names might overlap with your plot -- there are ways around this or you can leave as is)

Values are sum of total square footage for that set of criteria

In [987]:
buildingHeat = pd.pivot_table(buildings,index = 'Congress Dist',values = 'Square Footage',columns='Agency Name',aggfunc =np.sum)
buildingHeat

Agency Name,Appellate Court / Fifth District,Appellate Court / Fourth District,Appellate Court / Second District,Appellate Court / Third District,Chicago State University,Department of Agriculture,Department of Central Management Services,Department of Corrections,Department of Human Services,Department of Juvenile Justice,...,Illinois Emergency Management Agency,Illinois Medical District Commission,Illinois State University,Northeastern Illinois University,Northern Illinois University,Office of the Attorney General,Office of the Secretary of State,Southern Illinois University,University of Illinois,Western Illinois University
Congress Dist,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,,,,,231278.0,,372784.0,,...,5650.0,31200.0,,,41315.0,,,,,
1,,,,,1219492.0,,,,449547.0,,...,,,,,,,,,,
2,,,,,,,,49572.0,1253943.0,,...,,,,,,,,,3643049.0,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,9932.0,,362890.0,,...,,,,1110103.0,,,28452.0,,,
6,,,,,,,,,,72411.0,...,,,,,,,,,,
7,,,,,,,2088840.0,,304039.0,,...,,15000.0,,,,,56904.0,,6363904.0,
8,,,43330.0,,,,65268.0,,913263.0,,...,,,,,,,,,,
9,,,,,,,,,,,...,,,,,,,,,,


In [988]:
colors = buildings.groupby('Congress Dist')[['Agency Name', 'Square Footage']].sum()
rows = buildingHeat.index
columns = buildings['Agency Name'].unique()

In [989]:
colors

Unnamed: 0_level_0,Square Footage
Congress Dist,Unnamed: 1_level_1
0,793124.0
1,2351762.0
2,6285684.0
3,1293687.0
4,317741.0
5,1809022.0
6,145659.0
7,9211651.0
8,1461548.0
9,77978.0


In [990]:
#agency = buildingHeat.columns.levels[1].tolist()
#agency

In [991]:
colors.shape

(19, 1)

In [992]:
# (1) Scales: x/y, colors
col_sc = bqplot.ColorScale(scheme = "Reds")
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# (2) Axis: x/y, colors
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc,label = 'Agency Name')
y_ax = bqplot.Axis(scale = y_sc,label = 'Congressional District',
                   orientation = 'vertical')

# (3) Marks: heatmap
heat_map = bqplot.GridHeatMap(color = np.log10(buildingHeat.values),
                              row = rows,
                              column = columns,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'}, # make interactive on click of each box
                              anchor_style = {'fill':'blue'}, # to make our selection blue
                              selected_style = {'opacity': 1.0}, # make 100% opaque if box is selected
                              unselected_style = {'opacity': 0.8}) # make a little see-through if not

# (4) Link selection on heatmap to other things in kater section

# (5) Paint heatmap canvas, don't display yet:
fig_heat = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])
#fig_heat

### Line plot

**Right component:**

x is the year

y is total square footage acquired that year

### Heatmap + line plot

In [993]:
mySelectedLabel = ipywidgets.Label() # start with our label

In [994]:
col_sc = bqplot.ColorScale(scheme = "Reds")
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# (2) Axis: x/y, colors
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc,label = 'Agency Name')
y_ax = bqplot.Axis(scale = y_sc,label = 'Congressional District',
                   orientation = 'vertical')

# (3) Marks: heatmap
heat_map = bqplot.GridHeatMap(color = np.log10(buildingHeat.values),
                              row = buildingHeat.index,
                              column = buildingHeat.columns,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'}, # make interactive on click of each box
                              anchor_style = {'fill':'blue'}, # to make our selection blue
                              selected_style = {'opacity': 1.0}, # make 100% opaque if box is selected
                              unselected_style = {'opacity': 0.8}) # make a little see-through if not


In [995]:
x_scl = bqplot.LinearScale()
y_scl = bqplot.LinearScale()

# (2) Axis: x/y, colors
ax_xcl = bqplot.Axis(label='Year Acquired',scale = x_scl)
ax_ycl = bqplot.Axis(label='Total Square Footage',scale = y_scl,
                   orientation = 'vertical',side='left')

rowx = buildings['Year Constructed'].unique()
columny = buildings.groupby('Year Constructed')['Square Footage'].sum()

# (3) Marks: line plot
i, j = 12,10
distselect = buildingHeat.index[i]
agenselect = buildingHeat.columns[j]
mask = (buildings['Congress Dist'] == distselect) & (buildings['Agency Name']== agenselect)
b=buildings[mask].groupby('Year Acquired')['Square Footage'].sum()
line = bqplot.Lines(x = b.index ,
                    y = b,
                    scales = {'x': x_scl,
                            'y': y_scl})



In [996]:
def get_data_value(change):
    if len(change['owner'].selected) == 1: #only 1 selected
        i, j = change['owner'].selected[0] # grab the x/y coordinates
        distselect = buildingHeat.index[i]
        agenselect = buildingHeat.columns[j]
        mySelectedLabel.value = 'Total Square Footage in Congressional District '+ str(distselect)+','+str(agenselect)
        #update our line plot
        mask = (buildings['Congress Dist'] == distselect) & (buildings['Agency Name']== agenselect)
        b=buildings[mask].groupby('Year Acquired')['Square Footage'].sum()
        line.x = b.index #buildings[mask]['Year Acquired'].values
        line.y = b
        
heat_map.observe(get_data_value, 'selected')

In [997]:
buildingHeat.columns[1]
# agenselect = buildingHeat.columns[j]

'Appellate Court / Fourth District'

In [998]:
fig_heat = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])
fig_line = bqplot.Figure(marks = [line], axes = [ax_xcl, ax_ycl])

In [999]:
fig_heat.layout.min_width = '500px' 
fig_line.layout.min_width = '500px'

figures = ipywidgets.HBox([fig_heat, fig_line])
myDashboard = ipywidgets.VBox([mySelectedLabel, figures])
myDashboard

VBox(children=(Label(value=''), HBox(children=(Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale…

### Write up

**1. Data transformation and rescaling.**

    I have loaded the csv file and stored the information in a dataframe buildings. In order to plot the heatmap, I have extracted the three targeted columns: Agency Name, Congress Dist and Sqaure Footage and stored them in a pivot table, where index = Congress Dist, columns = Agency Name and values = Total square footage.(Total squre footage used a groupby function) These three variables were then passed to heatmap. For the corresponding line plot, I have extracted the coordinates selected from the heatmap and rescaled a subset accordingly. I then ploted a line plot using the subset. 
    
**2. NaN values.**

    For NaN values, I have replaced them with 0s when I loaded the csv file. 
    
**3. Aesthetic choices:**

    Colors: I have chosen the red schemes for heatmap just to contrast with missing values and blue color in line plot. 
    Layout: I have displayed heatmap and line plot in the same line horizontally so that the interactions between them could be easily seen. 
    Plot size: I have chosen the size of 500px as width for both graphs so that spaces are evenly distributed and more spread out for aesthetic concerns. 
    Labels: Labels has updated for both axis and sizes. 
    I have also printed out the x and y coordinated when a cell in heatmap is selected. (values in which congressional distrct in which agency)