In [1]:
import pandas as pd
import numpy as np
import plotly.offline as py
from plotly.offline import init_notebook_mode
import plotly.graph_objs as go

In [2]:
init_notebook_mode(connected=True)

### Data Cleaning

In [3]:
# path to dataset
data_path ='/Users/madana/Documents/Small-Business-Capstone/Models and Forecasting/Time Series/Forecasted Data/'
# country level
us_sme_contract =pd.read_csv(data_path +'us_contract_establish_forecast.csv', index_col=0)
us_sme_end =pd.read_csv(data_path +'us_end_establish_forecast.csv', index_col=0)
us_sme_expand =pd.read_csv(data_path +'us_expand_establish_forecast.csv', index_col=0)
us_sme_open =pd.read_csv(data_path +'us_open_establish_forecast.csv', index_col=0)
# state level
state_sme_contract =pd.read_csv(data_path +'contract_establish_forecast_by_state.csv', index_col=0)
state_sme_end =pd.read_csv(data_path +'end_establish_forecast_by_state.csv', index_col=0)
state_sme_expand =pd.read_csv(data_path +'expand_establish_forecast_by_state.csv', index_col=0)
state_sme_open =pd.read_csv(data_path +'open_establish_forecast_by_state.csv', index_col=0)

In [4]:
us_state_abbrev = {'Alabama': 'AL','Alaska': 'AK','Arizona': 'AZ','Arkansas': 'AR','California': 'CA','Colorado': 'CO',
                   'Connecticut': 'CT','Delaware': 'DE','Florida': 'FL','Georgia': 'GA','Hawaii': 'HI','Idaho': 'ID',
                   'Illinois': 'IL','Indiana': 'IN','Iowa': 'IA','Kansas': 'KS','Kentucky': 'KY','Louisiana': 'LA',
                   'Maine': 'ME','Maryland': 'MD','Massachusetts': 'MA','Michigan': 'MI','Minnesota': 'MN','Mississippi': 'MS',
                   'Missouri': 'MO','Montana': 'MT','Nebraska': 'NE','Nevada': 'NV','New Hampshire': 'NH','New Jersey': 'NJ',
                   'New Mexico': 'NM','New York': 'NY','North Carolina': 'NC','North Dakota': 'ND','Ohio': 'OH','Oklahoma': 'OK',
                   'Oregon': 'OR','Pennsylvania': 'PA','Rhode Island': 'RI','South Carolina': 'SC','South Dakota': 'SD',
                   'Tennessee': 'TN','Texas': 'TX','Utah': 'UT','Vermont': 'VT','Virginia': 'VA','Washington': 'WA',
                   'West Virginia': 'WV','Wisconsin': 'WI','Wyoming': 'WY'}

In [5]:
us_sme_contract['time'] =us_sme_contract['ds'].apply(lambda x: str(x)[:7])
us_sme_end['time'] =us_sme_end['ds'].apply(lambda x: str(x)[:7])
us_sme_expand['time'] =us_sme_expand['ds'].apply(lambda x: str(x)[:7])
us_sme_open['time'] =us_sme_open['ds'].apply(lambda x: str(x)[:7])
state_sme_contract['time'] =state_sme_contract['ds'].apply(lambda x: str(x)[:7])
state_sme_end['time'] =state_sme_end['ds'].apply(lambda x: str(x)[:7])
state_sme_expand['time'] =state_sme_expand['ds'].apply(lambda x: str(x)[:7])
state_sme_open['time'] =state_sme_open['ds'].apply(lambda x: str(x)[:7])

### Build Maps

In [6]:
data_dict = {}
# US data
data_dict["USA Average"] = {'timestamp': us_sme_contract["time"], 
                    'contract_obs': us_sme_contract["actual data"], 
                    'contract_pred': us_sme_contract["yhat"],
                    'end_obs': us_sme_end["actual data"], 
                    'end_pred': us_sme_end["yhat"], 
                    'expand_obs': us_sme_expand["actual data"],
                    'expand_pred': us_sme_expand["yhat"],
                    'open_obs': us_sme_open["actual data"],
                    'open_pred': us_sme_open["yhat"]}
# states data
for state in state_sme_contract.state.unique():
    data_dict[state] = {'timestamp': state_sme_contract[state_sme_contract["state"] == state]["time"].reset_index(drop=True), 
                        'contract_obs': state_sme_contract[state_sme_contract["state"] == state]["actual data"].reset_index(drop=True), 
                        'contract_pred': state_sme_contract[state_sme_contract["state"] == state]["yhat"].reset_index(drop=True),
                        'end_obs': state_sme_end[state_sme_end["state"] == state]["actual data"].reset_index(drop=True), 
                        'end_pred': state_sme_end[state_sme_end["state"] == state]["yhat"].reset_index(drop=True), 
                        'expand_obs': state_sme_expand[state_sme_expand["state"] == state]["actual data"].reset_index(drop=True),
                        'expand_pred': state_sme_expand[state_sme_expand["state"] == state]["yhat"].reset_index(drop=True),
                        'open_obs': state_sme_open[state_sme_open["state"] == state]["actual data"].reset_index(drop=True),
                        'open_pred': state_sme_open[state_sme_open["state"] == state]["yhat"].reset_index(drop=True)}

In [7]:
# prepare data
data = []
button_index = {}
state_cnt = 0
for state in data_dict.keys():
    # Contract
    trace_contract_obs = go.Scatter(x = data_dict[state]['timestamp'], y = data_dict[state]['contract_obs'],
                                    name = state + ' contract observed',
                                    text = "State: " + state,
                                    line = dict(color = ('rgb(255,97,3)'), width = 4))
    trace_contract_pred = go.Scatter(x = data_dict[state]['timestamp'], y = data_dict[state]['contract_pred'],
                                    name = state + ' contract estimated',
                                    text = "State: " + state,
                                    line = dict(color = ('rgb(255,97,3)'), width = 4, dash = 'dash'))
    # End
    trace_end_obs = go.Scatter(x = data_dict[state]['timestamp'], y = data_dict[state]['end_obs'],
                                    name = state + ' end observed',
                                    text = "State: " + state,
                                    line = dict(color = ('rgb(205,51,51)'), width = 4))
    trace_end_pred = go.Scatter(x = data_dict[state]['timestamp'], y = data_dict[state]['end_pred'],
                                    name = state + ' end estimated',
                                    text = "State: " + state,
                                    line = dict(color = ('rgb(205,51,51)'), width = 4, dash = 'dash'))
    # Expand
    trace_expand_obs = go.Scatter(x = data_dict[state]['timestamp'], y = data_dict[state]['expand_obs'],
                                    name = state + ' expand observed',
                                    text = "State: " + state,
                                    line = dict(color = ('rgb(61,145,64)'), width = 4))
    trace_expand_pred = go.Scatter(x = data_dict[state]['timestamp'], y = data_dict[state]['expand_pred'],
                                    name = state + ' expand estimated',
                                    text = "State: " + state,
                                    line = dict(color = ('rgb(61,145,64)'), width = 4, dash = 'dash'))
    # Open
    trace_open_obs = go.Scatter(x = data_dict[state]['timestamp'], y = data_dict[state]['open_obs'],
                                    name = state + ' open observed',
                                    text = "State: " + state,
                                    line = dict(color = ('rgb(22, 96, 167)'), width = 4))
    trace_open_pred = go.Scatter(x = data_dict[state]['timestamp'], y = data_dict[state]['open_pred'],
                                    name = state + ' open estimated',
                                    text = "State: " + state,
                                    line = dict(color = ('rgb(22, 96, 167)'), width = 4, dash = 'dash'))
    data.extend([trace_contract_obs, trace_contract_pred, trace_end_obs, trace_end_pred, 
                trace_expand_obs, trace_expand_pred, trace_open_obs, trace_open_pred])
    button_index[state] = (0 + 8*state_cnt)
    state_cnt += 1

In [8]:
data_bullean_list = []
all_states_bullean_list = []
for idx in range(len(data)):
    data_bullean_list.append(False)
    all_states_bullean_list.append(True)
# prepare button list
button_list = []
button_list.append(dict(label = "All states", method='update',
                            args = [{'visible': all_states_bullean_list}, {'title': "SME Trend in United States"}]))
for state in data_dict.keys():
    state_idx = button_index[state]
    state_bullean = data_bullean_list.copy()
    state_bullean[state_idx:state_idx+8] = [True,True,True,True,True,True,True,True]
    button_list.append(dict(label = str(state), method='update',
                            args = [{'visible': state_bullean}, {'title': str(state) + " SME Trend"}]))
updatemenus = list([dict(active=0, buttons=button_list, 
                         direction='down', x=0.128, y=1.1, yanchor='top')])

In [9]:
# setting layout
layout = dict(title = 'SME Trend in United States',
              xaxis = dict(title = 'Time', rangeslider=dict(visible = True)),
              yaxis = dict(title = 'Value'),
              annotations = [dict(x="2004-09", y=0, arrowcolor="rgba(63, 81, 181, 0.2)", arrowsize=0.3,
                             ax=0, ay=-120, text="Historical", xref="x", yanchor="bottom",yref="y"),
                             dict(x="2019-6", y=0, arrowcolor="rgba(76, 175, 80, 0.1)", arrowsize=0.3,
                             ax=0, ay=-120, text="Estimation", xref="x", yanchor="bottom",yref="y")],
              shapes = [dict(fillcolor="rgba(63, 81, 181, 0.2)", line={"width": 0}, type="rect",
                        x0="1992-09", x1="2017-12", xref="x", y0=0, y1=0.95, yref="paper"), 
                        dict(fillcolor="rgba(76, 175, 80, 0.1)", line={"width": 0}, type="rect",
                        x0="2017-12", x1="2021-03", xref="x", y0=0, y1=0.95, yref="paper")],
             updatemenus=updatemenus)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='update-line-by-button')