## Environment setup

In [1]:
import pandas as pd
from datetime import date
from os import path

from bokeh.plotting import figure, output_notebook, output_file, show
from bokeh.layouts import column, row
from bokeh.models import HoverTool, ColumnDataSource, Select
from bokeh.models.callbacks import CustomJS
from bokeh.palettes import Colorblind

## Methods that return our datasets

In [2]:
def get_us_dataset():
    
    filename = "../data/raw/us_confirmed_%s.csv" % date.today().isoformat()
    
    # If we don't already have a file today, download it and write it to the data directory
    if not path.exists(filename):
        covid_us = pd.read_csv('https://github.com/datasets/covid-19/raw/master/data/us_confirmed.csv')
        covid_us.to_csv(filename, index=False)
        
    # Read the US data from today's CSV into a big dataframe
    covid_us = pd.read_csv(filename)
    
    covid_us['Admin2'] = covid_us['Admin2'].fillna("None")
    
    return covid_us

In [3]:
def state_data(window=7):
    case_df = df[['Date', 'Province/State', 'Case']].groupby(['Date', 'Province/State']).sum()
    
    # Turn the inner index (state) into the columns
    total_cases = case_df.unstack(level=[-1])
    total_cases.columns = total_cases.columns.droplevel()
    
    # For some reason the index is not automatically recognized as a datetime
    # Need datetime format for the graph to display correctly
    total_cases.index = pd.to_datetime(total_cases.index)
    
    # Create new dataframes for new daily cases and rolling averages
    new_cases = total_cases.diff()
    rolling_avg = new_cases.rolling(window).mean()
    
    return total_cases.reset_index(level=0), new_cases.reset_index(level=0), rolling_avg.reset_index(level=0)

## Download the dataset

In [4]:
df = get_us_dataset()

## Create the dashboard

In [5]:
palette = Colorblind[6]

output_file("../state_comparison.html")

#########
# Create Data Sources for  County Data
#########
total_cases, new_cases, rolling_avg = state_data()
totals_source = ColumnDataSource(data=total_cases)
new_cases_source = ColumnDataSource(data=new_cases)
rolling_avg_source = ColumnDataSource(data=rolling_avg)


#########
# Create list of the counties in each state
# to use in the dropdown selection boxes
#########

state_list = df['Province/State'].unique().tolist()

######
# Create HoverTool
######

hovertool = HoverTool(
    tooltips=[
        ( 'date',   '@date{%F}'            ),
        ( 'New Cases',  '@new_cases' ), # use @{ } for field names with spaces
        ( '7 Day Avg', '@rolling_avg'      ),
    ],

    formatters={
        '@date'      : 'datetime', # use 'datetime' formatter for 'date' field
    },

    # display a tooltip whenever the cursor is vertically in line with a glyph
    #mode='vline'
)




##########
# Method for creating a plot along with state/county selection boxes
##########

def create_plot(state="District of Columbia"):
    
    state_source = ColumnDataSource(data=dict(date=total_cases['Date'], 
                                           total=totals_source.data[state], 
                                           new_cases=new_cases_source.data[state],
                                           rolling_avg=rolling_avg_source.data[state]))
    
    
    ########
    # Create the plot
    #######

    state_plot = figure(title=state, plot_width=600, plot_height=300, x_axis_type="datetime")
    #p5.line(x='date', y="total", source=county_source,
    #        color='navy', line_width=2, alpha=0.8, legend_label="Total Cases")
    state_plot.line(x='date', y="new_cases", source=state_source,
            color=palette[1], line_width=2, alpha=0.5, legend_label="New Cases")
    state_plot.line(x='date', y="rolling_avg", source=state_source,
            color=palette[0], line_width=2, alpha=0.8, legend_label="7 Day Average")
    state_plot.add_tools(hovertool)
    state_plot.legend.location = "top_left"



    #########
    # Create Widget to select State
    #########

    select_state = Select(title="Choose a State:", value=state, options=state_list)
 
    #######
    # Define the JavaScript Callbacks
    #######


    state_callback = CustomJS(args=dict(source=state_source,
                                        total_source=totals_source, 
                                        new_source=new_cases_source,
                                        rolling_source=rolling_avg_source,
                                        state_plot=state_plot), code="""
        var state = cb_obj.value;
        var data = source.data;

        var title = state_plot.title;
        title.text = state;
 
               
        for (var i = 0; i < data['total'].length; i++) {
            data['total'][i] = total_source.data[state][i];
            data['new_cases'][i] = new_source.data[state][i];
            data['rolling_avg'][i] = rolling_source.data[state][i];
        }
    
        source.change.emit()
    """)

    
    select_state.js_on_change('value', state_callback)

    return state_plot, select_state


###########
# Create a grid of four plots
###########

splot1, ss1 = create_plot("New York")
splot2, ss2 = create_plot("District of Columbia")
splot3, ss3 = create_plot("Florida")
splot4, ss4 = create_plot("California")

tile1 = column(ss1, splot1)
tile2 = column(ss2, splot2)
tile3 = column(ss3, splot3)
tile4 = column(ss4, splot4)


# show the results
show(column( row(tile1, tile2),  row(tile3, tile4)  ))

In [6]:
from bs4 import BeautifulSoup

In [7]:
soup = BeautifulSoup(open('../state_comparison.html'),"html.parser")

In [8]:
soup.title.string = "Covid US State Comparison"

In [10]:
html = soup.prettify("utf-8")
with open("../state_comparison.html", "wb") as file:
    file.write(html)