# Setting up the Data

## Imports

In [1]:
# python standard library
import os
from datetime import datetime

# pypi
import numpy
import pandas

# local
from common import (
    download_data,
    Paths,
    Urls,
    )

## Cleaning up the data

### Portland

In [2]:
portland = download_data(Paths.portland, Urls.portland)

In [3]:
column_renames = {"Value": "unemployment_rate",
                  "Label": "date"}
portland.rename(columns=column_renames,
                inplace=True)
portland.columns

Index(['Series ID', 'Year', 'Period', 'date', 'unemployment_rate'], dtype='object')

In [4]:
month_map = dict(M01="Jan", M02="Feb", M03="Mar", M04="Apr", M05="May",
                 M06="Jun", M07="Jul", M08="Aug", M09="Sep", M10="Oct",
                 M11="Nov", M12="Dec")
portland["month"] = portland.Period.apply(lambda x: month_map[x])

In [5]:
portland.head()

Unnamed: 0,Series ID,Year,Period,date,unemployment_rate,month
0,LAUMT413890000000003,2007,M01,2007 Jan,5.4,Jan
1,LAUMT413890000000003,2007,M02,2007 Feb,5.5,Feb
2,LAUMT413890000000003,2007,M03,2007 Mar,5.3,Mar
3,LAUMT413890000000003,2007,M04,2007 Apr,5.0,Apr
4,LAUMT413890000000003,2007,M05,2007 May,4.7,May


In [6]:
month_integers = dict(zip("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec".split(), range(1, 13)))

In [7]:
portland["datetime"] = portland.apply(lambda row: datetime(row.Year, month_integers[row.month], 1), axis=1)

### National

In [8]:
national = download_data(Paths.national, Urls.national)

In [9]:
national.head()

Unnamed: 0,Series ID,Year,Period,Label,Value
0,LNU04000000,2007,M01,2007 Jan,5.0
1,LNU04000000,2007,M02,2007 Feb,4.9
2,LNU04000000,2007,M03,2007 Mar,4.5
3,LNU04000000,2007,M04,2007 Apr,4.3
4,LNU04000000,2007,M05,2007 May,4.3


In [10]:
national.rename(columns=column_renames, inplace=True)
national["month"] = national.Period.apply(lambda x: month_map[x])

In [11]:
national.drop([122], inplace=True)

In [12]:
national["datetime"] = national.apply(lambda row: datetime(row.Year, month_integers[row.month], 1),
                                      axis=1)

### S & P Index

In [13]:
s_and_p_index = download_data(Paths.s_and_p, Urls.s_and_p, na_values=".")

In [14]:
pre = pandas.DataFrame({"DATE": ["2007-01-01", "2007-02-01", "2007-03-01"], "VALUE": [numpy.nan, numpy.nan, numpy.nan]})
s_and_p_index = pre.append(s_and_p_index)
s_and_p_index["date"] = portland.date.values
s_and_p_index = s_and_p_index.reset_index(drop=True)
s_and_p_index.head()

Unnamed: 0,DATE,VALUE,date
0,2007-01-01,,2007 Jan
1,2007-02-01,,2007 Feb
2,2007-03-01,,2007 Mar
3,2007-04-01,,2007 Apr
4,2007-05-01,1511.14,2007 May


Since I added date-times to the unemployment data I think I need to add it to all the other data too.

In [15]:
s_and_p_index["year"] = s_and_p_index.date.apply(lambda row: int(row.split()[0]))
s_and_p_index["month"] = s_and_p_index.date.apply(lambda row: row.split()[1])

In [16]:
s_and_p_index["datetime"] = s_and_p_index.apply(lambda row: datetime(row.year,
                                                                     month_integers[row.month], 1),
                                                axis=1)

### House Price Index

In [17]:
house_price_index = download_data(Paths.house_price, Urls.house_price)

In [18]:
house_price_index["price"] = house_price_index.HPIPONM226S
house_price_index["date"] = portland.date[1:].values
pre = pandas.DataFrame({"DATE": ["2007-01-01"], "HPIPONM226S": [numpy.nan], "price": [numpy.nan], "date": ["2007 Jan"]})
house_price_index = pre.append(house_price_index)
house_price_index = house_price_index.reset_index(drop=True)

In [19]:
house_price_index["year"] = house_price_index.date.apply(
    lambda row: int(row.split()[0]))
house_price_index["month"] = house_price_index.date.apply(
    lambda row: row.split()[1])
house_price_index["datetime"] = house_price_index.apply(
    lambda row: datetime(row.year,
                         month_integers[row.month], 1),
    axis=1)

# Getting significant points

## Portland

In [20]:
highest_unemployment = portland.unemployment_rate.max()
unemployment_peaks = numpy.where(portland.unemployment_rate==highest_unemployment)[0]

In [21]:
lowest_unemployment = portland.unemployment_rate.min()

In [22]:
recession_start = numpy.where(portland.date=="2007 Dec")[0][0]
recession_end = numpy.where(portland.date=="2009 Jun")[0][0]
portland_recession_start = portland.unemployment_rate.iloc[recession_start]

In [23]:
recession_start

11

## National

In [24]:
peak = national.unemployment_rate.max()
national_peak = numpy.where(national.unemployment_rate==peak)
national_recession_start = national.unemployment_rate.iloc[recession_start]
post_recession = national[national.Year > 2009]
index = numpy.where(post_recession.unemployment_rate==national_recession_start)[0][0]
post_recession.date.iloc[index]

'2015 Oct'

## S & P Index

In [25]:
s_and_p_nadir = s_and_p_index.VALUE.min()
s_and_p_nadir = numpy.where(s_and_p_index.VALUE==s_and_p_nadir)[0]

## House Price Index

In [26]:
housing_nadir = house_price_index.price.min()
housing_nadir = numpy.where(house_price_index.price==housing_nadir)[0]

# Plotting

## Some Constants

In [27]:
NATIONAL_COLOR = "slategrey"
NATIONAL_LABEL = "National"
PORTLAND_COLOR = "cornflowerblue"
PORTLAND_LABEL = "Portland-Hillsboro-Vancouver"
S_AND_P_COLOR = "#90151B"
S_AND_P_LABEL = "S & P 500 Index"
HOUSING_COLOR = "#D89159"
HOUSING_LABEL = "House Price Index"

This is how you get it done with bokeh. The ColumnDataSource works sort of like pandas plotting, you pass it in as the data source and then give the names of the columns as strings. Actually, I wonder if you can just use the DataFrame directly... No, I just tried it and it raised an exception.

First, the import and set-up. Since I'm going to set values in the hover-tool (otherwise it just gives the current cursor coordinates, not the data values), I have to import all the classes that represent tools - like HoverTool, ResetTool, etc.

In [28]:
from bokeh.io import (
    output_file,
    output_notebook,
    show,
    )

from bokeh.plotting import (
    figure,
    ColumnDataSource,
    )

from bokeh.models import (
    CrosshairTool,
    HoverTool,
    PanTool,
    ResetTool,
    ResizeTool,
    SaveTool,
    UndoTool,
    WheelZoomTool,
    )

In [30]:
output_notebook()

The *ColumnDataSource* looks like a pandas *DataFrame* (I think). The keys in the data dictionary are how you tell bokeh what you want to use.

In [31]:
source = ColumnDataSource(
    data={
        "Month": portland.datetime,
        "% Unemployment": portland.unemployment_rate,
        "month":portland.date,
    }
)

The HoverTool lets you set what gets shown in the popup. The first item in the tuple (e.g. "month") is used as the label, and the second refers to the column in the *ColumnDataSource* that you give it. The data columns (*x* and *y* in this case) use a "$" but the other columns (only "month" here) are referenced with an "@".

In [32]:
hover = HoverTool(tooltips=[
    ("month", "@month"),
    ("unemployment", "$y"),
])

In [34]:
portland_source = ColumnDataSource(
    data=dict(
        month_data=portland.datetime,
        unemployment=portland.unemployment_rate,
        month_label=portland.date,
        )
)
hover = HoverTool(tooltips=[
    ("month", "@month_label"),
    ("unemployment", "@unemployment"),
])

## Portland and National

First thing to do is make a *ColumnDataSource* for the national data.

In [35]:
national_source = ColumnDataSource(
    data=dict(
        month_data=national.datetime,
        unemployment=national.unemployment_rate,
        month_label=national.date,
        )
)

I don't know why but the hover tool isn't re-usable (well, I'm guessing none of them are), so the tools have to get re-created.

In [36]:
def plot_data(fig, source, color, legend=None, x="month_data", y="unemployment"):
    """Plots source as a line and a circle

    Args:
     fig : figure instance
     source: data-source
     color (str): color for the lines and circles
     legend (str): how to label the data
    """
    line = fig.line(x, y,
                    source=source,
                    line_color=color,
                    legend=legend)
    circles = fig.circle(x, y,
                         source=source,
                         line_color=color,
                         fill_color="white")
    return

# The Final Plot

This is a re-do of the final plot with the unemployment, housing, and S & P index data. First the two indices need to be changed to bokeh columns.

In [37]:
from bokeh.models import (
    BoxAnnotation,
    CustomJS,
    Span,
    Toggle,
    )

In [38]:
s_and_p_source = ColumnDataSource(
    data=dict(
        month_data=s_and_p_index.datetime,
        value=s_and_p_index.VALUE,
        month_label=s_and_p_index.date,
        )
)

In [39]:
housing_source = ColumnDataSource(
    data=dict(
        month_data=house_price_index.datetime,
        price=house_price_index.price,
        month_label=s_and_p_index.date,
        )
)

In order to get the plots aligned, you give them to the ``column`` function.

In [40]:
from bokeh.layouts import column

In [41]:
FIGURE_WIDTH = 600
FIGURE_HEIGHT = 300

First I'll create the unemployment figures.

In [42]:
def make_tools():
    """makes the tools for the figures
    
    Returns:
     list: tool objects
    """
    hover = HoverTool(tooltips=[
    ("month", "@month_label"),
    ("unemployment", "@unemployment"),
    ])
    
    tools = [
        hover,
        CrosshairTool(),
        PanTool(),
        ResetTool(),
        ResizeTool(),
        SaveTool(),
        UndoTool(),
        WheelZoomTool(),
    ]
    return tools

In [43]:
tools = make_tools()
unemployment_figure = figure(
    plot_width=FIGURE_WIDTH,
    plot_height=FIGURE_HEIGHT,
    x_axis_type="datetime",
    tools=tools,
    title="Portland Unemployment (2007-2017)"
)

In [44]:
unemployment_figure.line(
    "month_data", "unemployment",
    source=portland_source,
    line_color=PORTLAND_COLOR,
    legend=PORTLAND_LABEL,
          )

line = unemployment_figure.line(
    "month_data", "unemployment",
    source=national_source,
    line_color=NATIONAL_COLOR,
    legend=NATIONAL_LABEL,
)

Now to add the recession. Bokeh doesn't really have a way to specify a location using the datetime value (or the index) so there's an ugly hack to use the timestamp.

In [45]:
TIME_SCALE = 10**3

In [47]:
def scale_timestamp(index):
    """gets the scaled timestamp for element location

    Args:
     index: index in the portland.datetime series
    Returns:
     epoch timestamp used to locate place in plot
    """
    return portland.datetime[index].timestamp() * TIME_SCALE

In [48]:
def make_recession():
    """Makes the box for the recession

    Returns:
     BoxAnnotation to color the recession
    """
    return BoxAnnotation(
        left=scale_timestamp(recession_start),
        right=scale_timestamp(recession_end),
        fill_color="blue",
        fill_alpha=0.1)

In [49]:
def make_vertical(location, color="darkorange"):
    """makes a vertical line
    
    Args:
     location: place on the x-axis for the line
     color (str): line-color for the line
    Returns:
     Span at index
    """
    return Span(
        location=location,
        line_color=color,
        dimension="height",
    )

In [50]:
def make_verticals(fig):
    """makes the verticals and adds them to the figures"""
    fig.add_layout(make_vertical(
        location=scale_timestamp(unemployment_peaks[0]),
        color="darkorange",
    ))
    fig.add_layout(make_vertical(
        location=scale_timestamp(s_and_p_nadir[0]),
        color="crimson"))
    fig.add_layout(make_vertical(
        location=scale_timestamp(housing_nadir[0]),
        color="limegreen"))
    fig.add_layout(make_vertical(
        location=scale_timestamp(national_peak[0][0]),
        color="grey"))
    return

In [51]:
unemployment_figure.add_layout(make_recession())

Next, I'll add the peaks.

In [52]:
make_verticals(unemployment_figure)

Now to clean things up a little.

In [53]:
unemployment_figure.yaxis.axis_label = "% Unemployment"
unemployment_figure.xaxis.axis_label = "Month"
unemployment_figure.xgrid.visible = False
unemployment_figure.ygrid.visible = False

Now the S & P 500 Index.

In [54]:
hover = HoverTool(tooltips=[
    ("Month", "@month_label"),
    ("Value", "@value"),
])
tools = [
    hover,
    CrosshairTool(),
    PanTool(),
    ResetTool(),
    ResizeTool(),
    SaveTool(),
    UndoTool(),
    WheelZoomTool(),
]
s_and_p_figure = figure(
    plot_width=FIGURE_WIDTH,
    plot_height=FIGURE_HEIGHT,
    x_range=unemployment_figure.x_range,
    x_axis_type="datetime",
    tools=tools,
    title="S & P 500 Index",
)
line = s_and_p_figure.line("month_data", "value",
                    source=s_and_p_source,
                    line_color=S_AND_P_COLOR)

In [55]:
s_and_p_figure.add_layout(make_recession())

In [56]:
make_verticals(s_and_p_figure)

In [57]:
s_and_p_figure.yaxis.axis_label = "S & P 500 Valuation"
s_and_p_figure.xaxis.axis_label = "Month"
s_and_p_figure.xgrid.visible = False
s_and_p_figure.ygrid.visible = False

The S & P Index climbs to the right, so the legend has to go somewhere else.

In [58]:
s_and_p_figure.legend.location = "bottom_right"

Finally the House Price Index

In [59]:
hover = HoverTool(tooltips=[
    ("Month", "@month_label"),
    ("Price", "@price"),
])
tools = [
    hover,
    CrosshairTool(),
    PanTool(),
    ResetTool(),
    ResizeTool(),
    SaveTool(),
    UndoTool(),
    WheelZoomTool(),
]
housing_figure = figure(
    plot_width=FIGURE_WIDTH,
    plot_height=FIGURE_HEIGHT,
    x_range=unemployment_figure.x_range,
    x_axis_type="datetime",
    tools=tools,
    title="House Price Index",
)
line = housing_figure.line("month_data", "price",
                           source=housing_source,
                           line_color=HOUSING_COLOR)

In [60]:
housing_figure.add_layout(make_recession())
make_verticals(housing_figure)

In [61]:
housing_figure.yaxis.axis_label = "Sale Price ($1,000)"
housing_figure.xaxis.axis_label = "Month"
housing_figure.xgrid.visible = False
housing_figure.ygrid.visible = False

As with the S & P index, the house index climbs, so it has to be moved.

In [62]:
housing_figure.legend.location = "bottom_right"

## The Final Plot

The figures are combined into a single plot using the ``column`` function.

In [63]:
combined = column(unemployment_figure, s_and_p_figure, housing_figure)

## First Save it as a Fragment to embed elsewhere

The first time I tried it the javascript raised some errors. This is to see if putting it before any other plotting will help it.

In [65]:
from bokeh.resources import CDN
from bokeh.embed import autoload_static

In [66]:
PATH_TO_JAVASCRIPT = "portland_unemployment.js"
PATH_TO_HTML = "portland_unemployment_emed.html"

In [67]:
plot_javascript, tag = autoload_static(combined, CDN, PATH_TO_JAVASCRIPT)

In [68]:
with open(PATH_TO_JAVASCRIPT, "w") as writer:
    writer.write(plot_javascript)

In [69]:
with open(PATH_TO_HTML, "w") as writer:
    writer.write(tag)

In [70]:
show(combined)

I haven't figured out how to label them yet, so I'll say here that the green vertical line is the house-price nadir, the black line is the maximum national unemployment, the orange line is the first maximum Portland, Oregon unemployment (it hit that level twice), and the red line is the S & P 500 nadir. The blue area is the recession during this period.

## Saving the Plot

This is (hopefully) how to embed the plot into a static page.

In [78]:
from bokeh.resources import CDN
from bokeh.embed import autoload_static

In [85]:
OUTPUT_JAVASCRIPT = "portland_unemployment.js"

In [86]:
js, tag = autoload_static(combined, CDN, OUTPUT_JAVASCRIPT)

In [82]:
print(tag)


<script
    src="portland_unemployment.js"
    id="bf348e92-112c-4b6c-aced-9477a49e796c"
    data-bokeh-model-id="c5aee757-54bc-4c05-ac0e-7e68f574cdde"
    data-bokeh-doc-id="ca6171db-e64d-4d5a-bd81-7f20314822d4"
></script>


In [84]:
!ls | grep portland

portland_and_national_unemployment.org
portland_hillsboro_vancouver_2007_2017.csv
portland_unemployment_2007_2017.csv
portland_unemployment_data.csv
portland-unemployment-with-holoviews.ipynb


It looks like it doesn't actually save the javascript. You probabaly have to save it yourself.

In [87]:
with open(OUTPUT_JAVASCRIPT, "w") as writer:
    writer.write(js)

Insert the contents of the ``tag`` into the html that you want the plot embedded into and copy the ``portland_unemployment.js`` to the folder where the html will look for javascript files (add a path if it's in a folder).

I'm going to shut this notebook down to switch git branches so I'll save the tag as well.

In [88]:
with open("portland_tag.html", 'w') as writer:
    writer.write(tag)