In [None]:
import subprocess
import os

import pandas as pd
import numpy as np

from datetime import datetime

import requests
import json

def get_johns_hopkins():
    ''' Get data by a git pull request, the source code has to be pulled first
        Result is stored in the predifined csv structure
    '''
    git_pull = subprocess.Popen( "git pull" ,
                         cwd = os.path.dirname( '../data/raw/COVID-19/' ),
                         shell = True,
                         stdout = subprocess.PIPE,
                         stderr = subprocess.PIPE )
    (out, error) = git_pull.communicate()


    print("Error : " + str(error))
    print("out : " + str(out))


def get_current_data_germany():
    ''' Get current data from germany, attention API endpoint not too stable
        Result data frame is stored as pd.DataFrame

    '''
    # 16 states
    #data=requests.get('https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/Coronaf%C3%A4lle_in_den_Bundesl%C3%A4ndern/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json')

    # 400 regions / Landkreise
    data=requests.get('https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/RKI_Landkreisdaten/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json')

    json_object=json.loads(data.content)
    full_list=[]
    for pos,each_dict in enumerate (json_object['features'][:]):
        full_list.append(each_dict['attributes'])

    pd_full_list=pd.DataFrame(full_list)
    pd_full_list.to_csv('../data/raw/NPGEO/GER_state_data.csv',sep=';')
    print(' Number of regions rows: '+str(pd_full_list.shape[0]))

if __name__ == '__main__':
    get_johns_hopkins()
    #get_current_data_germany()


In [None]:
import pandas as pd
import numpy as np

from datetime import datetime


def store_relational_JH_data():
    ''' Transformes the COVID data in a relational data set

    '''

    data_path='../data/raw/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
    pd_raw=pd.read_csv(data_path)

    pd_data_base=pd_raw.rename(columns={'Country/Region':'country',
                      'Province/State':'state'})

    pd_data_base['state']=pd_data_base['state'].fillna('no')

    pd_data_base=pd_data_base.drop(['Lat','Long'],axis=1)


    pd_relational_model=pd_data_base.set_index(['state','country']) \
                                .T                              \
                                .stack(level=[0,1])             \
                                .reset_index()                  \
                                .rename(columns={'level_0':'date',
                                                   0:'confirmed'},
                                                  )

    pd_relational_model['date']=pd_relational_model.date.astype('datetime64[ns]')

    pd_relational_model.to_csv('../data/processed/COVID_relational_confirmed.csv',sep=';',index=False)
    print(' Number of rows stored: '+str(pd_relational_model.shape[0]))

if __name__ == '__main__':

    store_relational_JH_data()


In [1]:
import pandas as pd
import numpy as np

from datetime import datetime
import pandas as pd 

from scipy import optimize
from scipy import integrate

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns


sns.set(style="darkgrid")

mpl.rcParams['figure.figsize'] = (16, 9)
pd.set_option('display.max_rows', 500)

########################################

def SIR_model(SIR,beta,gamma):
    '''Simple SIR model
        S: susceptible population
        I: infected population
        R: recovered population
        beta: infection rate
        gamma: recovery rate
        
        overall condition is that the sum of changes (differnces) sum up to 0
        dS+dI+dR=0
        S+I+R= N (constant size of population)
    
     Parameters:
        ----------
        SIR : numpy.ndarray
        beta: float
        gamma: float
    '''
    
    S,I,R = SIR
    dS_dt=-beta*S*I/N0
    dI_dt=beta*S*I/N0-gamma*I
    dR_dt=gamma*I
    return(dS_dt,dI_dt,dR_dt)



if __name__ == '__main__':
    
    pd_JH_data=pd.read_csv('../data/processed/COVID_relational_confirmed.csv',sep=';',parse_dates=[0])
    pd_JH_data=pd_JH_data.sort_values('date',ascending=True).copy()


In [None]:
import pandas as pd
import numpy as np

import dash
dash.__version__
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output,State

import plotly.graph_objects as go

import os
print(os.getcwd())

df_input_large=pd.read_csv('../data/processed/COVID_final_set.csv',sep=';')

fig = go.Figure()

app = dash.Dash()
app.layout = html.Div([

    dcc.Markdown('''
    #  Applied Data Science on COVID-19 data

    Goal of the project is to teach data science by applying a cross industry standard process,
    it covers the full walkthrough of: automated data gathering, data transformations,
    filtering and machine learning to approximating the doubling time, and
    (static) deployment of responsive dashboard.

    '''),

    dcc.Markdown('''
    ## Multi-Select Country for visualization
    '''),


    dcc.Dropdown(
        id='country_drop_down',
        options=[ {'label': each,'value':each} for each in df_input_large['country'].unique()],
        value=['US', 'Germany','Italy'], # which are pre-selected
        multi=True
    ),

    dcc.Markdown('''
        ## SIR Parameters
        '''),
    
    dcc.Markdown('''
    ## Initial period(in Days)
    '''),
    dcc.Input(
             id="t_initial", type="number", placeholder="number",
             value=21,min=10, max=100, step=3
    ),
    
     dcc.Markdown('''
    ## Measures introduced period(in Days)
    '''),
    dcc.Input(
             id="t_intro_measures", type="number", placeholder="number",
             value=14,min=10, max=100, step=3
    ),
    
     dcc.Markdown('''
    ## Holding period(in Days)
    '''),
    dcc.Input(
             id="t_hold", type="number", placeholder="number",
             value=21,min=10, max=100, step=3
    ),
    
     dcc.Markdown('''
    ## Relaxation period(in Days)
    '''),
    dcc.Input(
             id="t_relax", type="number", placeholder="number",
             value=21, max=100, step=3
    ),
    
     dcc.Markdown('''
    ## Maximum infection rate
    '''),
     dcc.Input(
             id="beta_max", type="number", placeholder="number",
             value=0.4,min=0, max=100, step=3
    ),
    
     dcc.Markdown('''
    ## Minimum infection rate
    '''),
     dcc.Input(
             id="beta_min", type="number", placeholder="number",
             value=0.11,min=0, max=100, step=3
    ),
    
     dcc.Markdown('''
    ## Recovery rate
    '''),
     dcc.Input(
             id="gamma", type="number", placeholder="number",
             value=0.1,min=0, max=100, step=3
    ),


    dcc.Graph(figure=fig, id='main_window_slope')
])

@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_drop_down', 'value'),
    Input('t_initial', component_property='value'),
    Input('t_intro_measures',component_property= 'value'),
    Input('t_hold',component_property= 'value'),
    Input('t_relax',component_property= 'value'),
    Input('beta_max',component_property= 'value'),
    Input('beta_min', component_property='value'),
    Input('gamma', component_property='value')])

def update_figure(country_list,t_init,t_intro,t_hold,t_relax,bmax,bmin,gamma):
    
    traces =[]
    for each in country_list:
        
        df_plot=df_input_large[df_input_large['country']==each]
        #df_plot=df_input_large[df_input_large['country']]
        
        ydata= np.array(df_plot)
        
        t=np.arange(len(ydata))

        I0=ydata[0]
        N0=1000000
        S0=N0-I0
        R0=0

        #t_initial=25
        #t_intro_measures=21
        #t_hold=21
        #t_relax=110

        #beta_max=0.4
        #beta_min=0.11
        #gamma=0.1

        pd_beta=np.concatenate((np.array(t_init*[bmax]),
                               np.linspace(bmax,bmin,t_intro),
                               np.array(t_hold*[bmin]),
                               np.linspace(bmin,bmax,t_relax),
                               ))
    

        SIR=np.array([S0,I0,R0])

        propagation_rates=pd.DataFrame(columns={'susceptible':S0,
                                            'infected':I0,
                                            'recoverd':R0})



        for each_beta in pd_beta:

            new_delta_vec=SIR_model(SIR,each_beta,gamma)

            SIR=SIR+new_delta_vec

            propagation_rates=propagation_rates.append({'susceptible':SIR[0],
                                                        'infected':SIR[1],
                                                        'recovered':SIR[2]}, ignore_index=True)
            
            traces.append(dict(
                                x=propagation_rates.index,
                                y=propagation_rates.infected,
                                mode='markers+lines',
                                opacity=0.9,
                                name=each
                          )
                     )

        return {
                        'data': traces,
                        'layout': dict (
                            width=1280,
                            height=720,

                            xaxis={'title':'Timeline',
                                    'tickangle':-45,
                                    'nticks':20,
                                    'tickfont':dict(size=14,color="#7f7f7f"),
                                  },

                            yaxis={'title':'Timeline',
                                    'tickangle':-45,
                                    'nticks':20,
                                    'tickfont':dict(size=14,color="#7f7f7f"),
                                  }
                    )
        }
            
if __name__ == '__main__':

    app.run_server(debug=True, use_reloader=False)


X:\CVT\CVT\Computer\data_science\eds_covid-19\notebooks
Dash is running on http://127.0.0.1:8050/

 in production, use a production WSGI server like gunicorn instead.

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on


In [None]:
       
if __name__ == '__main__':

app.run_server(debug=True, use_reloader=False)

In [None]:
type(country_list)

In [None]:

@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_drop_down', 'value'),
    Input('t_initial', 'value'),
    Input('t_intro_measures', 'value'),
    Input('t_hold', 'value'),
    Input('t_relax', 'value'),
    Input('beta_max', 'value'),
    Input('beta_min', 'value'),
    Input('gamma', 'value')])


def update_figure(country_list,t_init,t_intro,t_hold,t_relax,bmax,bmin,gamma):
    
    traces =[]
    for each in country_list:

        df_plot=df_input_large[df_input_large['country']==each]

        ydata= np.array(df_plot.each[35:])
        ydata
        
        #ydata= np.array(df_analyse.Germany[35:])
        t=np.arange(len(ydata))

        I0=ydata[0]
        N0=1000000
        S0=N0-I0
        R0=0

        #t_initial=25
        #t_intro_measures=21
        #t_hold=21
        #t_relax=110

        #beta_max=0.4
        #beta_min=0.11
        #gamma=0.1

        pd_beta=np.concatenate((np.array(t_init*[bmax]),
                               np.linspace(bmax,bmin,t_intro),
                               np.array(t_hold*[bmin]),
                               np.linspace(bmin,bmax,t_relax),
                               ))
    

        SIR=np.array([S0,I0,R0])

        propagation_rates=pd.DataFrame(columns={'susceptible':S0,
                                            'infected':I0,
                                            'recoverd':R0})



        for each_beta in pd_beta:

            new_delta_vec=SIR_model(SIR,each_beta,gamma)

            SIR=SIR+new_delta_vec

            propagation_rates=propagation_rates.append({'susceptible':SIR[0],
                                                        'infected':SIR[1],
                                                        'recovered':SIR[2]}, ignore_index=True)
            
            traces.append(dict(
                                x=propagation_rates.index,
                                y=propagation_rates.infected,
                                mode='markers+lines',
                                opacity=0.9,
                                name=each
                          )
                     )

        return {
                        'data': traces,
                        'layout': dict (
                            width=1280,
                            height=720,

                            xaxis={'title':'Timeline',
                                    'tickangle':-45,
                                    'nticks':20,
                                    'tickfont':dict(size=14,color="#7f7f7f"),
                                  },

                            yaxis={'title':'Timeline',
                                    'tickangle':-45,
                                    'nticks':20,
                                    'tickfont':dict(size=14,color="#7f7f7f"),
                                  }
                    )
        }
            


In [None]:
       
#if __name__ == '__main__':

app.run_server(debug=True, use_reloader=False)

In [None]:
      
            figure, ax1 = plt.subplots(1, 1)

            ax1.plot(propagation_rates.index,propagation_rates.infected,label='infected',color='k')

            t_phases=np.array([t_initial,t_intro_measures,t_hold,t_relax]).cumsum()
            
            ax1.bar(np.arange(len(ydata)),ydata, width=0.8,label=' current infected Germany',color='r')
            ax1.axvspan(0,t_phases[0], facecolor='b', alpha=0.2,label='no measures')
            ax1.axvspan(t_phases[0],t_phases[1], facecolor='b', alpha=0.3,label='hard measures introduced')
            ax1.axvspan(t_phases[1],t_phases[2], facecolor='b', alpha=0.4,label='hold measures')
            ax1.axvspan(t_phases[2],t_phases[3], facecolor='b', alpha=0.5,label='relax measures')
            ax1.axvspan(t_phases[3],len(propagation_rates.infected), facecolor='b', alpha=0.6,label='repead hard measures')


            ax1.set_ylim(10, 1.5*max(propagation_rates.infected))
            ax1.set_yscale('log')
            ax1.set_title('Scenario SIR simulations  (demonstration purposes only)',size=16)
            ax1.set_xlabel('time in days',size=16)
            ax1.legend(loc='best',
                       prop={'size': 16});