# COVID2019
Justin Fackrell
Uses data from Johns Hopkins https://github.com/CSSEGISandData/COVID-19

In [None]:
import pandas as pd
from pathlib import Path


def get_data(file):

    # COVID-19 contains data pulled from https://github.com/CSSEGISandData/COVID-19
    _dir = Path(r"..\COVID-19\csse_covid_19_data\csse_covid_19_time_series")
    
    df = pd.read_csv(_dir / file)
    df.drop(['Lat', 'Long'], inplace=True, axis=1)
    df.set_index(['Province/State', 'Country/Region'], inplace=True)
    df = df.T
    return df

df_confirmed_raw = get_data("time_series_covid19_confirmed_global.csv")
df_deaths_raw = get_data("time_series_covid19_deaths_global.csv")

In [None]:
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots



def prepro(country2states, df, info):

    idx = pd.IndexSlice
    
    data = dict()
    diff_data = dict()

    for country in country2states:
        states = country2states[country]
        for state in states:
            _id = f"{country} {state if not pd.isnull(state) else ''}".strip()

            n = pd.Series(df.loc[:,idx[state, country]])
            d = n.diff()
            data[_id] = n
            diff_data[_id] = d
    
    df_diff = pd.DataFrame(data=diff_data)
    df = pd.DataFrame(data=data)

    return df, df_diff
    
    
def plot_confirmed_plus_deaths(df_confirmed, df_deaths, info, start_date=None):
    
    def _plot(df, start_date, info):
        labels = list()

        if start_date:
            mask = (df.index > start_date)
            df = df.loc[mask]
            
        for state in df.columns:
            plt.plot(df.index, df[state])
            labels.append(f"{state}")

        plt.legend(labels=labels)
        plt.title(f"{info}")

    plt.figure(figsize=(20,10))
    ax = plt.subplot(2,1,1)
    _plot(df_confirmed, start_date, 'confirmed')
    ax = plt.subplot(2,1,2)
    _plot(df_deaths, start_date, 'deaths')

    plt.xlabel('date')
    

    
def plot_confirmed_plus_deaths_fancy(df_confirmed, df_deaths, info, start_date=None, events={}, logy=False, lag=None):
    
    def _plot(fig, row, df, start_date, info, events, logy):
        df.index = pd.to_datetime(df.index)
        
        palate = px.colors.sequential.Jet
        labels = list()

        if start_date:
            mask = (df.index > start_date)
            df = df.loc[mask]
            
        if row == 1:
            _showleg = True
        else:
            _showleg = False
        
        for color, state in enumerate(df.columns):
            fig.add_trace(go.Scatter(x=df.index, y=df[state], mode='lines', name=state, 
                                     line=go.scatter.Line(color=palate[color]), showlegend=_showleg), row=row, col=1)
            px.scatter(df, y=state)
            if state in events:
                times = [ev[0] for ev in events[state]]
                texts = [ev[1] for ev in events[state]]
                x = list()
                y = list()
                text = list()
                for t, txt in zip(times, texts):
                    #import pdb; pdb.set_trace()
                    _d = df.loc[df.index > pd.to_datetime(t)].head(1)
                    x.append(_d.index[0])
                    y.append(_d[state][0])
                    text.append(txt)
                    if lag:
                        x.append(_d.index[0]+lag)
                        y.append(_d[state][0])
                        text.append(f"LAG:{txt}")
                        
                
                fig.add_trace(go.Scatter(x=x, y=y, text=text, mode="markers+text",  
                                         textposition="bottom center", showlegend=False), row=row, col=1)
                #print(f"Adding {[f't:{_x},y:{_y},txt:{_text}' for _x, _y, _text in zip(x, y, text)]}")
                
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True)
    if logy:
        fig.update_yaxes(type="log")
    fig.update_layout(
        margin=dict(l=20, r=20, t=20, b=20),
        paper_bgcolor="LightSteelBlue",
        autosize=False,
        width=800,
        height=800,)
    

    #fig = go.Figure()
    _plot(fig, 1, df_confirmed, start_date, 'confirmed', events, logy)
    _plot(fig, 2, df_deaths, start_date, 'deaths', events, logy)
    fig.show()

In [None]:
events = {"United Kingdom": [("2020-03-22", "UK lockdown")], 
         "Norway": [("2020-03-13", "NO lockdown")], 
         "China Hubei": [("2020-01-23", "CN lockdown")], 
         "Italy": [("2020-03-09", "IT lockdown")], 
         "Spain": [("2020-03-14", "ES lockdown")], 
                  }
lag = pd.Timedelta(14, unit="d")

In [None]:
country2states = {'US': [None], 'China': ['Hubei'], 'Italy': [None], 'Spain': [None], 'Norway': [None], 'United Kingdom': [None]}

df_confirmed, df_confirmed_diff = prepro(country2states, df_confirmed_raw, 'confirmed')
df_deaths, df_deaths_diff = prepro(country2states, df_deaths_raw, 'deaths')

plot_confirmed_plus_deaths_fancy(df_confirmed_diff, df_deaths_diff, "diff", "2020-03-01", events, True, lag)