**The novel coronavirus is bringing countries to a halt and causing widespread health impacts and fatalities. This notebook aims at providing real-time Covid19 analysis and visualizations (cases, deaths & recovery) across countries on a daily-level. In this notebook I am taking a closer look at numbers and trends. All data are taken from the Johns Hopkins Covid-19 dataset on GitHub**

## **Import Packages**

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.animation as animation
from IPython.display import HTML
import warnings
warnings.filterwarnings("ignore")

## **Reading Data from Johns Hopkins Covid-19 dataset on GitHub**

In [None]:
#defining a function to read all 3 files
def get_frame(name):
    url = (
        'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/'
        f'csse_covid_19_time_series/time_series_covid19_{name}_global.csv')
    df = pd.read_csv(url)
    return df


df_cases=get_frame('confirmed') #raw-data for confirmed cases
df_deaths=get_frame('deaths') #raw-data for deaths
df_recovered = get_frame('recovered') #raw-data for recovered cases

df_cases=df_cases.drop(columns=['Lat','Long','Province/State'])
df_deaths=df_deaths.drop(columns=['Lat','Long','Province/State'])
df_recovered=df_recovered.drop(columns=['Lat','Long','Province/State'])
df_cases.head()

## **Current Trend** 
**Since we have cumulative numbers, we will be taking the most recent date to get latest data**

In [None]:
#current data
res_cases=df_cases.columns.to_list()
res_deaths=df_deaths.columns.to_list()
res_recovered=df_recovered.columns.to_list()

#taking data of most recent date
dftoday_cases=df_cases[['Country/Region',res_cases[len(res_cases)-1]]]
dftoday_deaths=df_deaths[['Country/Region',res_deaths[len(res_cases)-1]]]
dftoday_recovered=df_recovered[['Country/Region',res_recovered[len(res_cases)-1]]]

dftoday_cases.columns=['Country','Total_cases']
dftoday_deaths.columns=['Country','Total_deaths']
dftoday_recovered.columns=['Country','Total_recovered']

dftoday_cases=dftoday_cases.groupby(['Country'])['Total_cases'].sum().reset_index()
dftoday_deaths=dftoday_deaths.groupby(['Country'])['Total_deaths'].sum().reset_index()
dftoday_recovered=dftoday_recovered.groupby(['Country'])['Total_recovered'].sum().reset_index()

#dftoday has current cases, deaths & recovered
dftoday=dftoday_cases.merge(dftoday_deaths.merge(dftoday_recovered, on='Country'), on='Country')

#current stats
print('Worldwide cases',':', (dftoday['Total_cases'].sum()/1000000).round(2),'Million')
print('Worldwide deaths',':', (dftoday['Total_deaths'].sum()/1000000).round(2),'Million')
print('Worldwide recovered',':', (dftoday['Total_recovered'].sum()/1000000).round(2),'Million')
print('Mortality Rate(%)',':', (dftoday['Total_deaths'].sum()*100.00/dftoday['Total_cases'].sum()).round(2),'%')

In [None]:
pip install chart_studio

***GEOSPATIAL DATA (WORLDWIDE CASES)***

In [None]:
#Plotting Geospatial data (current cases across countries)
import chart_studio.plotly as py 
import plotly.graph_objs as go #importing graphical objects
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

data = dict(type='choropleth',
            locations = dftoday['Country'],
            locationmode = 'country names',
            colorscale = 'Reds',
            text = dftoday['Country'],
            z = dftoday['Total_cases'],
            colorbar = {'title':"Total_cases"}
            )

layout = dict(title = 'Worldwide cases ',
              geo = dict(showframe = False,projection = {'type':'natural earth'}
             ))

choromap = go.Figure(data = [data],layout = layout)
iplot(choromap)

***GEOSPATIAL DATA (WORLDWIDE DEATHS)***

In [None]:
#Plotting Geospatial data (current deaths across countries)
import chart_studio.plotly as py 
import plotly.graph_objs as go #importing graphical objects
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

data = dict(type='choropleth',
            locations = dftoday['Country'],
            locationmode = 'country names',
            colorscale = 'Reds',
            text = dftoday['Country'],
            z = dftoday['Total_deaths'],
            colorbar = {'title':"Total_deaths"}
            )

layout = dict(title = 'Worldwide deaths ',
              geo = dict(showframe = False,projection = {'type':'natural earth'}
             ))

choromap = go.Figure(data = [data],layout = layout)
iplot(choromap)

***Top 10 worst hit countries & their contribution***

In [None]:
#Top 10 worst hit countries & their contribution (cases as of today)
pie=dftoday.sort_values(by='Total_cases',ascending=False).head(10)
pie.reset_index(drop=True, inplace=True)
row_10=['Others',dftoday.Total_cases.sum()-pie.Total_cases.sum(),dftoday.Total_deaths.sum()-pie.Total_deaths.sum(),dftoday.Total_recovered.sum()-pie.Total_recovered.sum()]
pie.loc[10]=row_10

import plotly.express as px
fig = px.pie(pie, values= 'Total_cases', names="Country",title='World COVID-19 Cases')
fig.update_traces(hole=.4,textposition='outside', textinfo='percent+label')
fig.show()

In [None]:
#Top 10 worst hit countries & their contribution (deaths as of today)
pie=dftoday.sort_values(by='Total_deaths',ascending=False).head(10)
pie.reset_index(drop=True, inplace=True)
row_10=['Others',dftoday.Total_cases.sum()-pie.Total_cases.sum(),dftoday.Total_deaths.sum()-pie.Total_deaths.sum(),dftoday.Total_recovered.sum()-pie.Total_recovered.sum()]
pie.loc[10]=row_10

fig = px.pie(pie, values= 'Total_deaths', names="Country",title='World COVID-19 Deaths')
fig.update_traces(hole=.4,textposition='outside', textinfo='percent+label')
fig.show()

**Mortality rate of Top 10 worst hit countries****

In [None]:
#Mortality rate of countries with Top 10 highest cases
bar=dftoday.sort_values(by='Total_cases',ascending=False).head(20)
bar.reset_index(drop=True, inplace=True)
bar['Mortality_Rate']=(bar['Total_deaths']*100.00/bar['Total_cases']).round(2)
bar=bar.sort_values(by='Mortality_Rate').reset_index()

x = bar['Country']
y = bar['Mortality_Rate']

import plotly.express as px
fig = px.bar(bar, x="Mortality_Rate", y="Country",orientation='h',title='Mortality Rate(%)',text='Mortality_Rate')
fig.show()

*Note: Countries with the highest mortality rate have the most deaths proportionally to their COVID-19 cases or population, not necessarily the most deaths overall. A high mortality rate could also mean that there's not enough tests being done in that country*

## **Time-series analysis and visualization of Covid Cases & Deaths across Countries**

In [None]:
#creating tables to store day-level data across countries

#historical cases per country
tempc=pd.melt(df_cases, id_vars=['Country/Region'], var_name='Date', value_name='Cases')
tempc1=tempc.groupby(['Country/Region','Date'])['Cases'].sum().reset_index()
tempc1.columns = ['Country', 'Date','Cases']
tempc1['Date']=pd.to_datetime(tempc1['Date'])

#adding Total rows(summation across countries) for each day
totalc=tempc1.groupby(['Date'])['Cases'].sum().reset_index().sort_values(by='Cases')
totalc['Country']='Total'

framesc=[tempc1,totalc]
dfch=pd.concat(framesc)

#historical deaths per country
tempd=pd.melt(df_deaths, id_vars=['Country/Region'], var_name='Date', value_name='Deaths')
tempd1=tempd.groupby(['Country/Region','Date'])['Deaths'].sum().reset_index()
tempd1.columns = ['Country', 'Date','Deaths']
tempd1['Date']=pd.to_datetime(tempd1['Date'])

#adding Total rows(summation across countries) for each day
totald=tempd1.groupby(['Date'])['Deaths'].sum().reset_index().sort_values(by='Deaths')
totald['Country']='Total'

framesd=[tempd1,totald]
dfdh=pd.concat(framesd)

#historical recovery per country
tempr=pd.melt(df_recovered, id_vars=['Country/Region'], var_name='Date', value_name='Recovered')
tempr1=tempr.groupby(['Country/Region','Date'])['Recovered'].sum().reset_index()
tempr1.columns = ['Country', 'Date','Recovered']
tempr1['Date']=pd.to_datetime(tempr1['Date'])

#adding Total rows(summation across countries) for each day
totalr=tempr1.groupby(['Date'])['Recovered'].sum().reset_index().sort_values(by='Recovered')
totalr['Country']='Total'

framesr=[tempr1,totalr]
dfrh=pd.concat(framesr)

#dfhist has cumulative cases, deaths, recovered for each day & country
dfhist=dfch.merge(dfdh.merge(dfrh, on=['Country','Date']), on=['Country','Date'])
dfhist['Date']=pd.to_datetime(dfhist['Date'])
dfhist.head()

In [None]:
pip install bar_chart_race

***Bar Chart race of Covid19 Cases***

In [None]:
#Bar chat race (Covid Cases)
bart=tempc1
bart.reset_index(drop=True, inplace=True)
barc = bart.pivot_table('Cases',['Date'], 'Country')
barc.fillna(0, inplace=True)
barc.sort_values(list(barc.columns),inplace=True)

import bar_chart_race as bcr
bcr.bar_chart_race(
    df=barc,
    filename=None,
    orientation='h',
    sort='desc',
    n_bars=10,
    fixed_order=False,
    fixed_max=True,
    steps_per_period=10,
    interpolate_period=False,
    label_bars=True,
    bar_size=.90,
    period_label={'x': .99, 'y': .25, 'ha': 'right', 'va': 'center'},
    #period_fmt='%B %d, %Y',
    period_summary_func=lambda v, r: {'x': .99, 'y': .18,
                                      's': f'Total cases: {v.nlargest(39).sum():,.0f}',
                                      'ha': 'right', 'size': 8, 'family': 'Courier New'},
    #perpendicular_bar_func='median',
    period_length=500,
    figsize=(5,3.3 ),
    dpi=144,
    cmap='G10',
    title='Worldwide COVID-19 cases by Country' ,
    title_size='',
    bar_label_size=7,
    tick_label_size=7,
    shared_fontdict={'family' : 'Helvetica', 'color' : '.1'},
    scale='linear',
    writer=None,
    fig=None,
    bar_kwargs={'alpha': .7},
    filter_column_colors=True)  

In [None]:
#Bar chat race (Covid Deaths)
bart=tempd1
bart.reset_index(drop=True, inplace=True)
barc = bart.pivot_table('Deaths',['Date'], 'Country')
barc.fillna(0, inplace=True)
barc.sort_values(list(barc.columns),inplace=True)

import bar_chart_race as bcr
bcr.bar_chart_race(
    df=barc,
    filename=None,
    orientation='h',
    sort='desc',
    n_bars=10,
    fixed_order=False,
    fixed_max=True,
    steps_per_period=10,
    interpolate_period=False,
    label_bars=True,
    bar_size=.90,
    period_label={'x': .99, 'y': .25, 'ha': 'right', 'va': 'center'},
    #period_fmt='%B %d, %Y',
    period_summary_func=lambda v, r: {'x': .99, 'y': .18,
                                      's': f'Total Deaths: {v.nlargest(39).sum():,.0f}',
                                      'ha': 'right', 'size': 8, 'family': 'Courier New'},
    #perpendicular_bar_func='median',
    period_length=500,
    figsize=(5,3.3 ),
    dpi=144,
    cmap='G10',
    title='Worldwide COVID-19 Deaths by Country' ,
    title_size='',
    bar_label_size=7,
    tick_label_size=7,
    shared_fontdict={'family' : 'Helvetica', 'color' : '.1'},
    scale='linear',
    writer=None,
    fig=None,
    bar_kwargs={'alpha': .7},
    filter_column_colors=True)  

In [None]:
#converting cumulative cases, deaths to daily new cases, deaths
data=dfhist
data=data.sort_values(by=['Country','Date'])
data.reset_index(drop=True,inplace=True)
data['Daily_cases'] = np.nan
data['Daily_cases'][0]=data['Cases'][0]
for i in range(1,len(data)):
    if data['Country'][i]==data['Country'][i-1]:
        data['Daily_cases'][i]= data['Cases'][i]-data['Cases'][i-1] 
    else:
        data['Daily_cases'][i]= data['Cases'][i]   
        
data['Daily_deaths'] = np.nan
data['Daily_deaths'][0]=data['Deaths'][0]
for i in range(1,len(data)):
    if data['Country'][i]==data['Country'][i-1]:
        data['Daily_deaths'][i]= data['Deaths'][i]-data['Deaths'][i-1] 
    else:
        data['Daily_deaths'][i]= data['Deaths'][i]   
          
data.tail()  

In [None]:
#Outlier (Turkey on 10th Dec'20)
data['Daily_cases'] = data['Daily_cases'].replace([823225.0],82322.50)
data['Daily_cases'] = data['Daily_cases'].replace([1494991.0],754088.5)

In [None]:
pip install mplcursors

***PLOTTING TOTAL WORLDWIDE CASES & DEATHS***

In [None]:
#Total Cases (worldwide)
import mplcursors

dft=data[data['Country']=='Total']
x=pd.to_datetime(dft['Date'])
y= dft['Cases']

import plotly.express as px
fig = px.line(dft, x="Date", y="Cases", title="Total Worldwide Cases")
fig.update_traces(mode="markers+lines", hovertemplate=None)
fig.update_layout(hovermode="x unified")

fig.show()

In [None]:
#Total deaths (worldwide)
import mplcursors

dft=data[data['Country']=='Total']
x=pd.to_datetime(dft['Date'])
y= dft['Deaths']

import plotly.express as px
fig = px.line(dft, x="Date", y="Deaths", title="Total Worldwide Deaths")
fig.update_traces(mode="markers+lines", hovertemplate=None)
fig.update_layout(hovermode="x unified")

fig.show()

***PLOTTING NEW WORLDWIDE CASES & DEATHS***

In [None]:
#plotting line chart for new confirmed cases worldwide
dft=data[data['Country']=='Total']
x=pd.to_datetime(dft['Date'])
y= dft['Daily_cases']

import plotly.express as px
fig = px.line(dft, x="Date", y="Daily_cases", title="New Worlwide Cases")
fig.update_traces(mode="markers+lines", hovertemplate=None)
fig.update_layout(hovermode="x unified")

fig.show()

In [None]:
#plotting line chart for new confirmed cases worldwide
dft=data[data['Country']=='Total']
x=pd.to_datetime(dft['Date'])
y= dft['Daily_deaths']

import plotly.express as px
fig = px.line(dft, x="Date", y="Daily_deaths", title="New Worldwide Deaths")
fig.update_traces(mode="markers+lines", hovertemplate=None)
fig.update_layout(hovermode="x unified")

fig.show()

***PLOTTING NEW CASES & DEATHS FOR WORST HIT COUNTRIES***

In [None]:
#plotting line chart for new cases at country level
country=['US','India','Brazil','Russia','United Kingdom','Turkey','Italy','Spain','Germany']
datac=data[data['Country'].isin(country)]
datac.reset_index(drop=True, inplace=True)
x=pd.to_datetime(datac['Date'])
y= datac['Daily_cases']

import plotly.express as px
fig = px.line(datac, x="Date", y="Daily_cases",color="Country", title="New Cases (worst hit countries)")
fig.update_traces(mode="markers+lines", hovertemplate=None)
fig.update_layout(hovermode="x unified")
fig.show()

In [None]:
#plotting line chart for new deaths at country level
country=['US','India','Brazil','Russia','United Kingdom','France','Turkey','Italy','Germany']
datac=data[data['Country'].isin(country)]
datac.reset_index(drop=True, inplace=True)
x=pd.to_datetime(datac['Date'])
y= datac['Daily_deaths']

import plotly.express as px
fig = px.line(datac, x="Date", y="Daily_deaths",color="Country", title="New Deaths (worst hit countries)")
fig.update_traces(mode="markers+lines", hovertemplate=None)
fig.update_layout(hovermode="x unified")
fig.show()

***PLOTTING NEW CASES FOR WORST HIT COUNTRIE (SEPARATELY)***

In [None]:
#plotting line chart for new cases at country level
country=['US','India','Brazil','Russia','United Kingdom','Turkey','Italy','Spain','Germany','China']
datac=data[data['Country'].isin(country)]
datac.reset_index(drop=True, inplace=True)
x=pd.to_datetime(datac['Date'])
y= datac['Daily_cases']

import plotly.express as px
for i in range(0,len(country)):
    fig = px.line(datac[datac['Country']==country[i]], x="Date", y="Daily_cases",color="Country", title="New Cases in" + " "+ country[i])
    fig.update_traces(mode="markers+lines", hovertemplate=None)
    fig.update_layout(hovermode="x unified")
    fig.show()