In [49]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np


%matplotlib inline
mpl.rcParams['figure.figsize'] = (16,9)
pd.set_option('display.max_rows', 500)

import plotly.graph_objects as go
import plotly.io as pio

![CRISP_DM](../reports/figures/CRISP_DM.png)

In [50]:
# parse_dates will help to parse the dates right at the beginning even if the date is stored in ISO format

df_analyse = pd.read_csv('../data/processed/COVID_small_flat_table.csv',sep=';',parse_dates=[0])
df_analyse.sort_values('date',ascending=True).tail()

Unnamed: 0,date,Italy,India,Germany,US,Spain
202,2020-08-11,251237,2329638,219540,5141208,326612
203,2020-08-12,251713,2396637,220859,5197411,329784
204,2020-08-13,252235,2461190,222281,5248958,337334
205,2020-08-14,252809,2525922,223791,5313252,342813
206,2020-08-15,253438,2589952,224488,5361165,342813


# Helper Functions

In [51]:
def quick_plot(x_in,df_input,y_scale = 'log',slider=False):


    
    fig = go.Figure()

    for each in df_input.columns:
        fig.add_trace(go.Scatter(
                                 x=x_in,
                                 y=df_input[each],
                                 name = each,
                                 opacity=0.8))
        
        
    fig.update_layout(autosize=True,
                         width=1024,
                         height=768,
                         font=dict(
                         family="PT Sans, monospace",
                         size=18,
                         color="#7f7f7f"
                         )
                        )
    fig.update_yaxes(type=y_scale),
    fig.update_xaxes(tickangle=-45,
                     nticks=20,
                     tickfont=dict(size=14,color="#7f7f7f")
                        )

    if slider==True:
            fig.update_layout(xaxis_rangeslider_visible=True)
    fig.show()
    
    

In [52]:
quick_plot(df_analyse.date,
           df_analyse.iloc[:,1:],
           y_scale='log',
           slider=True)

In [53]:
threshold=100

In [54]:
compare_list=[]
for pos, country in enumerate(df_analyse.columns[1:]):
    compare_list.append(np.array(df_analyse[country][df_analyse[country]>threshold]))

In [55]:
pd.DataFrame(compare_list,index=(df_analyse.columns[1:]))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,165,166,167,168,169,170,171,172,173,174
Italy,155,229,322,453,655,888,1128,1694,2036,2502,...,249204.0,249756.0,250103.0,250566.0,250825.0,251237.0,251713.0,252235.0,252809.0,253438.0
India,102,113,119,142,156,194,244,330,396,499,...,,,,,,,,,,
Germany,130,159,196,262,482,670,799,1040,1176,1457,...,222281.0,223791.0,224488.0,,,,,,,
US,104,174,222,337,451,519,711,1109,1561,2157,...,,,,,,,,,,
Spain,120,165,222,259,400,500,673,1073,1695,2277,...,342813.0,342813.0,,,,,,,,


In [56]:
pd_sync_timelines=pd.DataFrame(compare_list,index=(df_analyse.columns[1:])).T

In [58]:
pd_sync_timelines['date']=np.arange(pd_sync_timelines.shape[0])

In [60]:
pd_sync_timelines.head()

Unnamed: 0,Italy,India,Germany,US,Spain,date
0,155.0,102.0,130.0,104.0,120.0,0
1,229.0,113.0,159.0,174.0,165.0,1
2,322.0,119.0,196.0,222.0,222.0,2
3,453.0,142.0,262.0,337.0,259.0,3
4,655.0,156.0,482.0,451.0,400.0,4


In [59]:
quick_plot(pd_sync_timelines.date,
           pd_sync_timelines.iloc[:,1:],
           y_scale='log',
           slider=True)

# Exponential slopes

In [63]:
def doubling_rate(N_0,t,T_d):
    return N_0*np.power(2,t/T_d)

In [67]:
max_days=34

norm_slopes={
    #'doubling every day':doubling_rate(100,np.arange(10),1),
    'doubling every two days':doubling_rate(100,np.arange(20),2),
    'doubling every 4 days':doubling_rate(100,np.arange(20),4),
    'doubling every 10 days':doubling_rate(100,np.arange(20),10),
}

In [68]:
pd_sync_timelines_w_slope= pd.concat([pd.DataFrame(norm_slopes),pd_sync_timelines],axis=1)

In [69]:
quick_plot(pd_sync_timelines_w_slope.date,
           pd_sync_timelines_w_slope.iloc[:,1:],
           y_scale='log',
           slider=True)