# Importing Libraries

In [18]:
%matplotlib inline

import numpy as np
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_dark"
from plotly.subplots import make_subplots
import folium 
from folium import plugins
from tqdm.notebook import tqdm as tqdm


import warnings
warnings.filterwarnings('ignore')

In [2]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [3]:
data = pd.read_csv('/content/gdrive/My Drive/Covid-19 analysis/train.csv',parse_dates=['Date'])
data.head()

Unnamed: 0,Id,Province/State,Country/Region,Lat,Long,Date,ConfirmedCases,Fatalities
0,1,,Afghanistan,33.0,65.0,2020-01-22,0.0,0.0
1,2,,Afghanistan,33.0,65.0,2020-01-23,0.0,0.0
2,3,,Afghanistan,33.0,65.0,2020-01-24,0.0,0.0
3,4,,Afghanistan,33.0,65.0,2020-01-25,0.0,0.0
4,5,,Afghanistan,33.0,65.0,2020-01-26,0.0,0.0


In [4]:
cleaned_data = pd.read_csv('/content/gdrive/My Drive/Covid-19 analysis/covid_19_clean_complete.csv', parse_dates=['Date'])
cleaned_data.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region
0,,Afghanistan,33.93911,67.709953,2020-01-22,0,0,0,0,Eastern Mediterranean
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0,0,Europe
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0,0,Africa
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0,0,Europe
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0,0,Africa


# Data Processing

In [5]:
# cases 
cases = ['Confirmed', 'Deaths', 'Recovered', 'Active']

# Active Case = confirmed - deaths - recovered
cleaned_data['Active'] = cleaned_data['Confirmed'] - cleaned_data['Deaths'] - cleaned_data['Recovered']

# filling missing values 
cleaned_data[['Province/State']] = cleaned_data[['Province/State']].fillna('')
cleaned_data[cases] = cleaned_data[cases].fillna(0)

cleaned_data.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region
0,,Afghanistan,33.93911,67.709953,2020-01-22,0,0,0,0,Eastern Mediterranean
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0,0,Europe
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0,0,Africa
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0,0,Europe
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0,0,Africa


# Confirmed Cases

In [6]:
# Creating a dataframe with total no of cases for every country
confirmiedcases = pd.DataFrame(cleaned_data.groupby('Country/Region')['Confirmed'].sum())
confirmiedcases['Country/Region'] = confirmiedcases.index
confirmiedcases.index = np.arange(1,188)
global_confirmiedcases = confirmiedcases[['Country/Region','Confirmed']]

fig = px.bar(global_confirmiedcases.sort_values('Confirmed',ascending=False)[:20][::-1],x='Confirmed',y='Country/Region',title='Confirmed Cases Worldwide',text='Confirmed', height=900, orientation='h')
fig.show()

# Death Cases

In [7]:
Deathcases = pd.DataFrame(cleaned_data.groupby('Country/Region')['Deaths'].sum())
Deathcases['Country/Region'] = Deathcases.index
Deathcases.iodex = np.arange(1,181)
global_Deathcases = Deathcases[['Country/Region','Deaths']]

fig = px.bar(global_Deathcases.sort_values('Deaths',ascending=False)[:20][::-1],x='Deaths',y='Country/Region',title='Deaths Cases Worldwide',text='Deaths', height=900, orientation='h')
fig.show()



# Recovered Cases

In [8]:
Recoveredcases = pd.DataFrame(cleaned_data.groupby('Country/Region')['Recovered'].sum())
Recoveredcases['Country/Region'] = Recoveredcases.index
Recoveredcases.iodex = np.arange(1,181)
global_Recoveredcases = Recoveredcases[['Country/Region','Recovered']]

fig = px.bar(global_Recoveredcases.sort_values('Recovered',ascending=False)[:20][::-1],x='Recovered',y='Country/Region',title='Recovered Cases Worldwide',text='Recovered', height=900, orientation='h')
fig.show()


# Active Cases

In [9]:
Activecases = pd.DataFrame(cleaned_data.groupby('Country/Region')['Active'].sum())
Activecases['Country/Region'] = Activecases.index
Activecases.iodex = np.arange(1,181)
global_Activecases = Activecases[['Country/Region','Active']]

fig = px.bar(global_Activecases.sort_values('Active',ascending=False)[:20][::-1],x='Active',y='Country/Region',title='Active Cases Worldwide',text='Active', height=900, orientation='h')
fig.show()

# Spread of Corona Virus over time (Worldwide)

In [10]:
date_c = cleaned_data.groupby('Date')['Confirmed','Deaths','Recovered','Active'].sum().reset_index()


from plotly.subplots import make_subplots
fig = make_subplots(rows=1, cols=4, subplot_titles=("Comfirmed", "Deaths", "Recovered",'Active'))

trace1 = go.Scatter(
                x=date_c['Date'],
                y=date_c['Confirmed'],
                name="Confirmed",
                line_color='orange',
                mode='lines+markers',
                opacity=0.8)
trace2 = go.Scatter(
                x=date_c['Date'],
                y=date_c['Deaths'],
                name="Deaths",
                line_color='red',
                mode='lines+markers',
                opacity=0.8)

trace3 = go.Scatter(
                x=date_c['Date'],
                y=date_c['Recovered'],
                name="Recovered",
                mode='lines+markers',
                line_color='green',
                opacity=0.8)

trace4 = go.Scatter(
                x=date_c['Date'],
                y=date_c['Active'],
                name="Active",
                line_color='blue',
                mode='lines+markers',
                opacity=0.8)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 1, 3)
fig.append_trace(trace4, 1, 4)
fig.update_layout(template="plotly_dark",title_text = '<b>Global Spread of the Coronavirus Over Time </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='white'))
fig.show()

# Spread of Corona Virus over time (China)

In [11]:
grouped_china = cleaned_data[cleaned_data['Country/Region'] == "China"].reset_index()
grouped_china_date = grouped_china.groupby('Date')['Date', 'Confirmed', 'Deaths','Recovered','Active'].sum().reset_index()

grouped_italy = cleaned_data[cleaned_data['Country/Region'] == "Italy"].reset_index()
grouped_italy_date = grouped_italy.groupby('Date')['Date', 'Confirmed', 'Deaths','Recovered','Active'].sum().reset_index()

grouped_iran = cleaned_data[cleaned_data['Country/Region'] == "Iran"].reset_index()
grouped_iran_date = grouped_iran.groupby('Date')['Date', 'Confirmed', 'Deaths','Recovered','Active'].sum().reset_index()

grouped_korea = cleaned_data[cleaned_data['Country/Region'] == "South Korea"].reset_index()
grouped_korea_date = grouped_korea.groupby('Date')['Date', 'Confirmed', 'Deaths','Recovered','Active'].sum().reset_index()

grouped_spain = cleaned_data[cleaned_data['Country/Region'] == "Spain"].reset_index()
grouped_spain_date = grouped_spain.groupby('Date')['Date', 'Confirmed', 'Deaths','Recovered','Active'].sum().reset_index()

grouped_rest = cleaned_data[~cleaned_data['Country/Region'].isin(['China', 'Italy', 'iran', 'South Korea', 'Spain'])].reset_index()
grouped_rest_date = grouped_rest.groupby('Date')['Date', 'Confirmed', 'Deaths','Recovered','Active'].sum().reset_index()


fig = make_subplots(rows=1, cols=4, subplot_titles=("Comfirmed", "Deaths", "Recovered",'Active'))

trace1 = go.Scatter(x=grouped_china_date['Date'],y=grouped_china_date['Confirmed'],name="Confirmed",line_color='orange',mode='lines+markers',opacity=0.8)
trace2 = go.Scatter(x=grouped_china_date['Date'],y=grouped_china_date['Deaths'],name="Deaths",line_color='red',mode='lines+markers',opacity=0.8)
trace3 = go.Scatter(x=grouped_china_date['Date'],y=grouped_china_date['Recovered'],name="Recovered",mode='lines+markers',line_color='green',opacity=0.8)
trace4 = go.Scatter(x=grouped_china_date['Date'],y=grouped_china_date['Active'],name="Active",line_color='blue',mode='lines+markers',opacity=0.8)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 1, 3)
fig.append_trace(trace4, 1, 4)
fig.update_layout(template="plotly_dark",title_text = '<b>Spread of the Coronavirus Over Time in CHINA (TOP 1)</b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='white'))
fig.show()

# Spread of Corona Virus over time (Italy)

In [12]:
fig = make_subplots(rows=1, cols=4, subplot_titles=("Comfirmed", "Deaths", "Recovered",'Active'))

trace1 = go.Scatter(x=grouped_italy_date['Date'],y=grouped_italy_date['Confirmed'],name="Confirmed",line_color='orange',mode='lines+markers',opacity=0.8)
trace2 = go.Scatter(x=grouped_italy_date['Date'],y=grouped_italy_date['Deaths'],name="Deaths",line_color='red',mode='lines+markers',opacity=0.8)
trace3 = go.Scatter(x=grouped_italy_date['Date'],y=grouped_italy_date['Recovered'],name="Recovered",mode='lines+markers',line_color='green',opacity=0.8)
trace4 = go.Scatter(x=grouped_italy_date['Date'],y=grouped_italy_date['Active'],name="Active",line_color='blue',mode='lines+markers',opacity=0.8)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 1, 3)
fig.append_trace(trace4, 1, 4)
fig.update_layout(template="plotly_dark",title_text = '<b>Global Spread of the Coronavirus Over Time in ITALY (TOP 2)</b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='white'))
fig.show()

# Spread of Corona Virus over time (Spain)

In [13]:
fig = make_subplots(rows=1, cols=4, subplot_titles=("Comfirmed", "Deaths", "Recovered",'Active'))

trace1 = go.Scatter(x=grouped_spain_date['Date'],y=grouped_spain_date['Confirmed'],name="Confirmed",line_color='orange',mode='lines+markers',opacity=0.8)
trace2 = go.Scatter(x=grouped_spain_date['Date'],y=grouped_spain_date['Deaths'],name="Deaths",line_color='red',mode='lines+markers',opacity=0.8)
trace3 = go.Scatter(x=grouped_spain_date['Date'],y=grouped_spain_date['Recovered'],name="Recovered",mode='lines+markers',line_color='green',opacity=0.8)
trace4 = go.Scatter(x=grouped_spain_date['Date'],y=grouped_spain_date['Active'],name="Active",line_color='blue',mode='lines+markers',opacity=0.8)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 1, 3)
fig.append_trace(trace4, 1, 4)
fig.update_layout(template="plotly_dark",title_text = '<b>Global Spread of the Coronavirus Over Time in SPAIN (TOP 5)</b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='white'))
fig.show()

# Spread of Corona Virus over time (Global)

In [14]:
fig = make_subplots(rows=1, cols=4, subplot_titles=("Comfirmed", "Deaths", "Recovered",'Active'))

trace1 = go.Scatter(x=grouped_rest_date['Date'],y=grouped_rest_date['Confirmed'],name="Confirmed",line_color='orange',mode='lines+markers',opacity=0.8)
trace2 = go.Scatter(x=grouped_rest_date['Date'],y=grouped_rest_date['Deaths'],name="Deaths",line_color='red',mode='lines+markers',opacity=0.8)
trace3 = go.Scatter(x=grouped_rest_date['Date'],y=grouped_rest_date['Recovered'],name="Recovered",mode='lines+markers',line_color='green',opacity=0.8)
trace4 = go.Scatter(x=grouped_rest_date['Date'],y=grouped_rest_date['Active'],name="Active",line_color='blue',mode='lines+markers',opacity=0.8)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 1, 3)
fig.append_trace(trace4, 1, 4)
fig.update_layout(template="plotly_dark",title_text = '<b>Global Spread of the Coronavirus Over Time in REST OF ALL OTHER </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='white'))
fig.show()

# create map and display it
world_map = folium.Map(location=[10, -20], zoom_start=2.5,tiles='Stamen Toner')

for lat, lon, Confirmed,Deaths,Recovered,name in zip(grouped_rest['Lat'], grouped_rest['Long'], grouped_rest['Confirmed'],grouped_rest['Deaths'],grouped_rest['Recovered'], grouped_rest['Country/Region']):
    folium.CircleMarker([lat, lon],
                        radius=7,
                        popup = ('<strong>Country</strong>: ' + str(name).capitalize() + '<br>'
                                '<strong>Confirmed Cases</strong>: ' + str(Confirmed) + '<br>'
                                '<strong>Recovered Cases</strong>: ' + str(Recovered) +'<br>'
                                '<strong>Deaths Cases</strong>: ' + str(Deaths) +'<br>'),
                        color='red',
                        
                        fill_color='red',
                        fill_opacity=0.7 ).add_to(world_map)
world_map

# Spread of Corona Virus on Map

In [15]:


# create map and display it
latitude = 39.91666667
longitude = 116.383333
world_map = folium.Map(location=[latitude, longitude], zoom_start=3.5,tiles='Stamen Toner')

for lat, lon, Confirmed,Deaths,Recovered,name in zip(grouped_china['Lat'], grouped_china['Long'], grouped_china['Confirmed'],grouped_china['Deaths'],grouped_china['Recovered'], grouped_china['Country/Region']):
    folium.CircleMarker([lat, lon],
                        radius=10,
                        popup = ('<strong>Country</strong>: ' + str(name).capitalize() + '<br>'
                                '<strong>Confirmed Cases</strong>: ' + str(Confirmed) + '<br>'
                                '<strong>Recovered Cases</strong>: ' + str(Recovered) +'<br>'
                                '<strong>Deaths Cases</strong>: ' + str(Deaths) +'<br>'),
                        color='red',
                        
                        fill_color='red',
                        fill_opacity=0.7 ).add_to(world_map)
world_map

Output hidden; open in https://colab.research.google.com to view.

## Mortality and Recovery Rates

In [16]:
temp = cleaned_data.groupby('Date')['Recovered', 'Deaths', 'Active'].sum().reset_index()
temp = temp.melt(id_vars="Date", value_vars=['Recovered', 'Deaths', 'Active'],
                 var_name='case', value_name='count')


fig = px.area(temp, x="Date", y="count", color='case',
             title='Cases over time: Area Plot', color_discrete_sequence = ['cyan', 'red', 'orange'])
fig.show()

In [17]:
cleaned_latest = cleaned_data[cleaned_data['Date'] == max(cleaned_data['Date'])]
flg = cleaned_latest.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()

flg['mortalityRate'] = round((flg['Deaths']/flg['Confirmed'])*100, 2)
temp = flg[flg['Confirmed']>100]
temp = temp.sort_values('mortalityRate', ascending=False)

fig = px.bar(temp.sort_values(by="mortalityRate", ascending=False)[:10][::-1],
             x = 'mortalityRate', y = 'Country/Region', 
             title='Deaths per 100 Confirmed Cases', text='mortalityRate', height=800, orientation='h',
             color_discrete_sequence=['darkred']
            )
fig.show()