In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()">
<input type="submit" value="CLICK HERE - Toggle code ON/OFF."></form>''')

#LOAD PACKAGES

**HOW TO USE APP <br>
STEP 1: CLICK 'CELL' IN MENU BAR <br>
STEP 2: CLICK 'RUN ALL'**

In [9]:
from IPython.display import HTML

#LOAD PACKAGES
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
import matplotlib.ticker as ticker
from lxml import html
from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook, show, output_file
from bokeh.palettes import Spectral6
from bokeh.models import NumeralTickFormatter, Legend
from bokeh.models import (GMapPlot, GMapOptions, ColumnDataSource,
                         Circle, Range1d, PanTool, WheelZoomTool,
                         BoxSelectTool)
from ipywidgets import *
import time
import datetime as dt
from datetime import datetime

# Housing Bubble Detector
## *House Prices in North America at a Glance*
The average price of homes in North American cities have increased substantially in recent years. The circle glyphs in the map below depict the average price of homes in 2018.

In [10]:
cities = pd.read_csv('cities.csv')
cities = cities[cities['lat'].notna()]
map_options = GMapOptions(lat=35.5020794, lng=-97.9912878,
                         map_type='roadmap', zoom=4)
api_key = 'AIzaSyAbZ7IXKHL2EUGMzzaAafdnbkcGMRV6Z6M'

plot = GMapPlot(x_range=Range1d(),
                y_range=Range1d(),
                map_options=map_options,
                api_key=api_key)
plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())
baseline = cities['p_avg'].min() - 1.0
source = ColumnDataSource(data=dict(
    lat = cities['lat'].tolist(),
    lon = cities['long'].tolist(),
    rad = [(i-baseline) / 20000 for i in cities['p_avg'].tolist()]))

circle = Circle(x='lon',
                y='lat',
                size='rad',
                fill_color='red',
                fill_alpha=0.5)
plot.add_glyph(source, circle)

output_file('Bubble_Plot.html')
show(plot)

## *Rising Prices*
In Toronto, for example, it has grown from about \$465,000 in 2013, to about \$761,000 in 2017 (+64 per cent). Such increases over relatively short periods have led many to conclude that the markets are experiencing a 'bubble', which would soon burst and lead to a rapid price decline.
- *Use the dropdown menu to select your city and see how average prices have grown.*

In [5]:
p_avg = pd.read_csv('average_prices.csv')
to = pd.read_csv('to.xy.csv')
aus = pd.read_csv('aus.xy.csv')
buf = pd.read_csv('buf.xy.csv')
chrl = pd.read_csv('chr.xy.csv')
dal = pd.read_csv('dal.xy.csv')
frs = pd.read_csv('frs.xy.csv')
la = pd.read_csv('la.xy.csv')
prt = pd.read_csv('prt.xy.csv')
sf = pd.read_csv('sf.xy.csv')
sj = pd.read_csv('sj.xy.csv')
slc = pd.read_csv('slc.xy.csv')
p_dist = [aus, sj, buf, dal, la, sf, frs, prt, slc, chrl, to]

cty_list = ['Austin, TX','San Jose, CA','Buffalo, NY','Dallas, TX',
'Los Angeles, CA','San Francisco, CA','Fresno, CA','Portland, OR',
'Salt Lake City, UT','Charleston, SC','Toronto, ON']

D = {}
for i in range(0,len(cty_list)):
    d = {'Date':p_avg['Date'], 
         'Avg Price':p_avg[cty_list[i]]}
    D[cty_list[i]] = pd.DataFrame(d)

D1 = {}
for i in range(0,len(cty_list)):
    d = {'x.data':p_dist[i].iloc[:,0], 'y.data':p_dist[i].iloc[:,1], 
         'y.norm':p_dist[i].iloc[:,2]}
    D1[cty_list[i]] = pd.DataFrame(d)

In [6]:
output_notebook()
def plotfn(City='Toronto, ON'):
    dat = D[City]
    x = [datetime.strptime(i,'%Y-%m-%d')for i in dat['Date']]
    y = dat['Avg Price']/1000
    plot = figure(tools="pan", 
                  title="Average House Prices in Toronto (2010-18)",
                  x_axis_label='Year',
                  x_axis_type='datetime',
                  y_axis_label='Average Price (x$1,000)', 
                  plot_width=550, plot_height=350)

    plot.title.text_font_size = '14pt'
    plot.xaxis.axis_label_text_font_size = '12pt'
    plot.xaxis.axis_label_text_font_style = 'bold'
    plot.xaxis.major_label_text_font_size = '12pt'
    plot.yaxis[0].formatter = NumeralTickFormatter(format='1,000')
    plot.yaxis.axis_label_text_font_size = '12pt'
    plot.yaxis.axis_label_text_font_style = 'bold'
    plot.yaxis.major_label_text_font_size = '12pt'
    plot.line(x, y, line_width=3, line_color="red")

    show(plot)
    
interact(plotfn, 
         City=['Toronto, ON','San Francisco, CA','Dallas, TX',
               'Austin, TX','Salt Lake City, UT','Los Angeles, CA',
               'Fresno, CA','Buffalo, NY','Charleston, SC',
               'Portland, OR', 'San Jose, CA'])

<function __main__.plotfn(City='Toronto, ON')>

## *Average Prices Can't Detect Bubbles*
However, the trend in average prices alone is a relatively crude and imprecise indicator of housing bubbles. Average price tends would not describe non-bubble scenarios, such as a price 'adjustment' due to an undervalued market prior to 2013.
## *Existing Research*
Researchers at the University of Tokyo (2011) demonstrated a robust approach for detecting bubbles, using the distribution of prices. Under normal market conditions, house prices in the Greater Tokyo Area followed a log-normal distribution, similar to other international jurisdictions where prices were stable. However, an analysis of historic data showed that, during the bubble era, the distribution had a distinct fat upper (right) tail. Their method, which analyzes the entire cross-sectional price distribution rather than changes in the mean alone, can be used to robustly determine whether North American cities are also experiencing a bubble.

## *A Better Way to Detect Bubbles*
This app calculates the quality-adjusted distribution of prices by  controlling for key exogenous variables affecting prices, such as its size (sqft), the number of bedrooms, and the number of bathrooms. Specifically, the following model calculates the coefficient values for the key determinants of house prices.

$$ListPrice = b_{0} + b_{1}*SQFT + b_{2}*BedNum + b_{3}*BathNum$$ 

We use the coefficients to control for the effect of these variables using the following calculations: 
    1. We first calculate the average of each variable from our 
    sample of houses:  
$$AvgSQFT$$
<br>
    2. Then we determine the adjustment factors for each variable 
    and each house:
$$AdjSQFT = (AvgSQFT - SQFT_{i})*b_{1}$$
<br>
    3. Finally, we derive the adjusted house prices by summing 
    the adjustment factors with the actual listed price for each
    house:
$$AdjPrice = ActualListedPrice + AdjSQFT + AdjBedNum + adjBathNum$$
<br>
After performing these calcualtions, the App then plots the distribution of adjusted prices against the normal distribution for  comparison. This information is useful for prospective home buyers, investors, and developers alike! The researchers in Tokyo used real estate listings records from a local publication. Similarly, this app uses web-scraping to draw over 50,000 listings records from 11 major North American cities!<br>
<br>
Data for Toronto was scraped from a local real estate listing website: www.zolo.ca. Special care had to be taken to ensure that the IP address would not be blocked. This was achieved using a random delay between 1 to 5 seconds after scanning through each website. For US cities, the process had less complications, as a popular website (www.zillow.com) not only covered the entire country, but also had a policy of openly allowing users to scrape their information. 
- *Use the dropdown menu to select your city.*

In [7]:
output_notebook()
def plotfn1(City='Toronto, ON'):
    dat = D1[City]
    x = dat['x.data']/1000
    y = dat['y.data']/1000
    y0 = dat['y.norm']/1000
    plot = figure(tools="pan", 
                  title="Distribution of House Prices in "+City,
                  x_axis_label='Prices (x$1,000)',
                  y_axis_label='Density', 
                  plot_width=700, plot_height=400)
    
    plot.title.text_font_size = '14pt'
    plot.xaxis.axis_label_text_font_size = '12pt'
    plot.xaxis.axis_label_text_font_style = 'bold'
    plot.xaxis.major_label_text_font_size = '12pt'
    plot.xaxis[0].formatter = NumeralTickFormatter(format='1,000')
    plot.yaxis.axis_label_text_font_size = '12pt'
    plot.yaxis.axis_label_text_font_style = 'bold'
    plot.yaxis.major_label_text_font_size = '12pt'

    plot.line(x, y, legend=City, line_width=3, line_color="red")
    plot.line(x, y0, legend='N(μ,σ2)', line_width=3, 
              line_color="blue", line_dash="4 4")
    legend = Legend(location=(10, 30))
    
    plot.add_layout(legend)
    
    show(plot)
    
interact(plotfn1,
         City=['Toronto, ON','San Francisco, CA','Dallas, TX',
               'Austin, TX','Salt Lake City, UT','Los Angeles, CA',
               'Fresno, CA','Buffalo, NY','Charleston, SC',
               'Portland, OR','San Jose, CA'])

<function __main__.plotfn1(City='Toronto, ON')>

## *Results*
The app shows that Toronto likely is not experiencing a bubble, since the price distribution (solid line) is not skewed to the right, and is in fact slightly skewed to the left compared to the normal distribution (dotted line). This suggests that Toronto is currently undervalued and housae prices may continue to rise indefinitely. Cities in California however, such as San Francisco and Fresno likely are in a bubble; indicated by their right-skewed distributins.