In [None]:
'''
# Implementation of two interactive visualisations for the Home Sales Prices and Characteristics for Seattle and King County #

###
                                            ####  VISUALISATION WITH PLOTLY  #####

                                                ### MULTI AXIS LINE PLOT ###

                                    ### 2014-15 King County Real Estate Market Activity ###
###
###
###
                                            ####  VISUALISATION WITH BOKEH  #####

                                                ### ANIMATED BUBBLE PLOT ###

                                            ### Prices of Houses Built YEAR ###

###

#
'''

In [120]:
#General Imports
import pandas as pd
import numpy as np
import os
%pip install colorcet
import colorcet as cc
import matplotlib.pyplot as plt

Note: you may need to restart the kernel to use updated packages.


In [121]:
#Plotly Imports
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go

In [122]:
#Bokeh Imports
from bokeh import plotting as bplt
from bokeh.io import output_notebook, show, output_file, push_notebook
from bokeh.models import NumeralTickFormatter, ColumnDataSource, HoverTool, CustomJS, Slider, Toggle
from bokeh.models.callbacks import CustomJS
from bokeh.layouts import row, column
from bokeh.models.widgets import Slider

In [124]:
#Read the dataset
df=pd.read_csv('kc_house_data.csv')
df.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


In [127]:
#Print Ranges of House Prices
for i in range(10):
    a = df['price'][df['price']<100000*i].shape[0]
    b = df['price'][df['price']<100000*(i+1)].shape[0]
    print("Houses Sold for %s-%sk: %s" %(i*100,(i+1)*100,b-a))

for i in range(1,8):
    a = df['price'][df['price']<1000000*i].shape[0]
    b = df['price'][df['price']<1000000*(i+1)].shape[0]
    print("Houses Sold for %s-%sm: %s" %(i,i+1,b-a))

Houses Sold for 0-100k: 25
Houses Sold for 100-200k: 761
Houses Sold for 200-300k: 3651
Houses Sold for 300-400k: 4257
Houses Sold for 400-500k: 3714
Houses Sold for 500-600k: 2839
Houses Sold for 600-700k: 1954
Houses Sold for 700-800k: 1410
Houses Sold for 800-900k: 927
Houses Sold for 900-1000k: 583
Houses Sold for 1-2m: 1287
Houses Sold for 2-3m: 155
Houses Sold for 3-4m: 38
Houses Sold for 4-5m: 5
Houses Sold for 5-6m: 4
Houses Sold for 6-7m: 1
Houses Sold for 7-8m: 2


In [128]:
#Get Price Names
k = round(df['price']/1000,0).astype(int)
df['price_names'] = '$' + k.astype(str) + 'k'
for i in range(df['price_names'].shape[0]):
    if len(df['price_names'][i])>5:
        num = round(int(df['price_names'][i][1:(len(df['price_names'][i])-1)])/1000,1)
        df.loc[i,'price_names'] = '$' + str(num) + 'm'

In [129]:
#Get months
months = ['May 14','Jun 14','Jul 14','Aug 14','Sep 14','Oct 14','Nov 14','Dec 14','Jan 15','Feb 15','Mar 15','Apr 15','May 15']
months_in_date_form = ['201405','201406','201407','201408','201409','201410','201411','201412','201501','201502','201503','201504','201505']
date = dict(zip(months_in_date_form,months))
df['month'] = df['date']
for i in range(df.date.shape[0]):
    df.loc[i,'month'] = date[df.loc[i,'date'][:6]]

In [130]:
#Get year built or renovated
df['max_yr_built_or_renovated'] = df['yr_built']
index = df[df['yr_renovated']>0].index.values
for i in index:
    df.loc[i,'max_yr_built_or_renovated'] = max(df.loc[i,'yr_built'],df.loc[i,'yr_renovated'])

In [131]:
####  VISUALISATION WITH PLOTLY  #####

### MULTI AXIS LINE PLOT ###

### 2014-15 King County Real Estate Market Activity ###

In [132]:
init_notebook_mode(connected=True)

In [133]:
#Initialize the variables that will be used for this plot

monthly_mean_prices = []
for m in range(len(months)):
    temp_df = df[df['month']==months[m]]
    df['date'] = pd.to_datetime(df['date'])
    df_date = df.groupby('date', as_index=False).count()
    monthly_mean_prices.append(int(np.mean(temp_df['price'])))


In [134]:
# Create two traces of scatter plots indicating the two-dimensionality of the y-axis
trace1 = go.Scatter(
    x=months,
    y=df_date['id'],
    name='Transactions'
)
trace2 = go.Scatter(
    x=months,
    y=monthly_mean_prices,
    name='Mean Sale Prices',
    yaxis='y2'
)

#Create a data variable having the two traces
data = [trace1, trace2]

#Createing the layout of the plot, Title, axis labels, legend
layout = go.Layout(
    title='2014-15 King County Real Estate Market Activity',
    yaxis=dict(
        title='Transactions'
    ),
    yaxis2=dict(
        title='Monthly Mean Sale Prices',
        titlefont=dict(
            color='rgb(148, 103, 189)'
        ),
        tickfont=dict(
            color='rgb(148, 103, 189)'
        ),
        overlaying='y',
        side='right'
    )
)

#Plot and show the figure 
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='multiple-axes-double')

In [135]:
####  VISUALISATION WITH BOKEH  #####

### ANIMATED BUBBLE PLOT ###

### Prices of Houses Built YEAR ###

In [136]:
output_notebook()


In [138]:
# Styling for a plot
def style(p):
    # Title 
    p.title.align = 'center'
    p.title.text_font_size = '20pt'
    p.title.text_font = 'serif'

    # Axis titles
    p.xaxis.axis_label_text_font_size = '14pt'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_style = 'bold'

    # Tick labels
    p.xaxis.major_label_text_font_size = '12pt'
    p.yaxis.major_label_text_font_size = '12pt'

    return p

In [139]:
%pip install 'bokeh==1.3.4'

Note: you may need to restart the kernel to use updated packages.


In [140]:
# Create variables necessary for the color and the size of the bubbles
bins = [100000*i for i in range(0,11)]+[8000000]
df['binned2'] = pd.cut(df['price'], bins).astype(str).tolist()
bins = ['(0, 100000]', '(100000, 200000]', '(200000, 300000]', '(300000, 400000]',\
        '(400000, 500000]', '(500000, 600000]', '(600000, 700000]',\
        '(700000, 800000]', '(800000, 900000]', '(900000, 1000000]', '(1000000, 8000000]',]
palette = [cc.rainbow[(len(bins)-1-i)*15*int(17/(len(bins)))] for i in range(len(bins))]
df['color'] = palette[0]
col_conv = dict(zip(bins,palette))

min_price = min(df['price'])
max_price = max(df['price'])
sizes = [5*i+5 for i in range(1,len(bins)+1)]
df['size'] = sizes[0]
size_conv = dict(zip(bins,sizes))

for i in df.index.values:
    df.loc[i,'color'] = col_conv[df.loc[i,'binned2']]
    df.loc[i,'size'] = size_conv[df.loc[i,'binned2']]

In [141]:
#Group the variables that would be used in this graph such as price, grade, and year built
scatter_df = df[['price','grade','max_yr_built_or_renovated','color', 'size', 'price_names']][(df['yr_renovated']>0)\
                                                                                               &(df['grade']<33)]

In [142]:
# Create the bubble plot and assign the x and y variables, titles, labels as well as variables range
temp_df = scatter_df[scatter_df['max_yr_built_or_renovated']==2010]
source = ColumnDataSource(temp_df)

p = bplt.figure(plot_width=900, plot_height=350, title="Prices of Houses Built 2010",\
           x_axis_label = 'Grade', y_axis_label = "Price",\
           y_range = (0,max(scatter_df['price'])*1.05),\
           x_range = (min(scatter_df['grade'])*0.95,max(scatter_df['grade'])*1.05))
p.scatter(x='grade', y='price', color='color', size='size', source=source, alpha=0.8)
style(p)

#hover tool to display information (price and grade) when hovering over a specific point in the plot
h = HoverTool(tooltips=[('Price', '@price_names'),('Grade', '@grade')])
p.add_tools(h)

#callback CustomJS function has been created in order to update the plot when changing the value of the year from the slider
callback_scatter = CustomJS(code="""
if (IPython.notebook.kernel !== undefined) {
    var kernel = IPython.notebook.kernel;
    cmd = "update_scatter(" + cb_obj.value + ")";
    kernel.execute(cmd, {}, {});
}
""") 

# Function to update the features of the plot when changing the year from the slide
def update_scatter(year):
        temp_df = scatter_df[scatter_df['max_yr_built_or_renovated']==year]
        source.data = ColumnDataSource(temp_df).data
        p.title.text = "Prices of Houses Built in " + str(year)
        push_notebook(handle=scatter_handle)

# Define a slider specifying the maximum and minimum values the yr_built variable, the steps, 
#the initial value, the title, and the callback function that was created earlier.
year_select = Slider(start = min(scatter_df['max_yr_built_or_renovated']), end = max(scatter_df['max_yr_built_or_renovated']),
                    step = 1, value = 2010,
                    title = 'Year',callback=callback_scatter)

In [143]:
# Another callback CustomJS function has been created, this time for the toggle button that creates an animated plot
callback_animate = CustomJS(code="""
        var f = cb_obj.active;
        var j = 1965;

        if(f == true){
            mytimer = setInterval(replace_data, 250);             
        } else {
            clearInterval(mytimer);
        }

        function replace_data() {
             j++;
             if(j>2015){
                 j = 1965;
             }
             var kernel = IPython.notebook.kernel;
             cmd = "update_scatter(" + j + ")";
             kernel.execute(cmd, {}, {});
        }
""")

# Toggle button that will create the animation of the plot over years, which takes as a attribute the callback_animate function 
btn = Toggle(label="Play/Stop Animation (from 1965)", button_type="success",
             active=False, callback=callback_animate)

# Show the plot 
scatter_handle = show(column(btn,year_select,p), notebook_handle=True)