## Covid-19 Exploratory Data Analysis

### Covid-19 Dataset Understanding

In [1]:
# !pip install folium
# !pip install plotly

In [5]:
import plotly as py
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

import folium
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline
import math
import random
from datetime import datetime,timedelta

import warnings 
warnings.filterwarnings('ignore')

# color pallet
cnf = '#393e46'
dth = '#ff2e63'
rec = '#21bf73'
act = '#fe9801'

### Dataset Preparation

In [6]:
py.offline.init_notebook_mode(connected = True)

In [19]:
df = pd.read_csv('..\covid-19\laxmimerit\Covid-19-Preprocessed-Dataset\preprocessed\covid_19_data_cleaned.csv', \
                parse_dates = ['Date'])
country_daywise = pd.read_csv('..\covid-19\laxmimerit\Covid-19-Preprocessed-Dataset\preprocessed\country_daywise.csv', \
                parse_dates = ['Date'])
country_wise = pd.read_csv('..\covid-19\laxmimerit\Covid-19-Preprocessed-Dataset\preprocessed\countrywise.csv',)
daywise = pd.read_csv('..\covid-19\laxmimerit\Covid-19-Preprocessed-Dataset\preprocessed\daywise.csv', parse_dates = ['Date'])

In [11]:
df.dtypes

Date              datetime64[ns]
Province/State            object
Country                   object
Lat                      float64
Long                     float64
Confirmed                  int64
Recovered                  int64
Deaths                     int64
Active                     int64
dtype: object

In [13]:
df.shape

(38216, 9)

In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38216 entries, 0 to 38215
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Date            38216 non-null  datetime64[ns]
 1   Province/State  11560 non-null  object        
 2   Country         38216 non-null  object        
 3   Lat             38216 non-null  float64       
 4   Long            38216 non-null  float64       
 5   Confirmed       38216 non-null  int64         
 6   Recovered       38216 non-null  int64         
 7   Deaths          38216 non-null  int64         
 8   Active          38216 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(4), object(2)
memory usage: 2.6+ MB


In [12]:
df.isnull().sum()


Date                  0
Province/State    26656
Country               0
Lat                   0
Long                  0
Confirmed             0
Recovered             0
Deaths                0
Active                0
dtype: int64

In [18]:
df['Province/State'] = df['Province/State'].fillna("")
df.head()

Unnamed: 0,Date,Province/State,Country,Lat,Long,Confirmed,Recovered,Deaths,Active
0,2020-01-22,,Afghanistan,33.0,65.0,0,0,0,0
1,2020-01-23,,Afghanistan,33.0,65.0,0,0,0,0
2,2020-01-24,,Afghanistan,33.0,65.0,0,0,0,0
3,2020-01-25,,Afghanistan,33.0,65.0,0,0,0,0
4,2020-01-26,,Afghanistan,33.0,65.0,0,0,0,0


In [28]:
confirmed_df = df.groupby('Date').sum()['Confirmed'].reset_index()
recovered_df = df.groupby('Date').sum()['Recovered'].reset_index()
deaths_df = df.groupby('Date').sum()['Deaths'].reset_index()
print(confirmed_df)
print(recovered_df)
print(deaths_df)

          Date  Confirmed
0   2020-01-22        555
1   2020-01-23        654
2   2020-01-24        941
3   2020-01-25       1434
4   2020-01-26       2118
..         ...        ...
131 2020-06-01    6265852
132 2020-06-02    6378237
133 2020-06-03    6508635
134 2020-06-04    6632985
135 2020-06-05    6734088

[136 rows x 2 columns]
          Date  Recovered
0   2020-01-22         28
1   2020-01-23         30
2   2020-01-24         36
3   2020-01-25         39
4   2020-01-26         52
..         ...        ...
131 2020-06-01    2691038
132 2020-06-02    2794375
133 2020-06-03    2874125
134 2020-06-04    2944289
135 2020-06-05    2746192

[136 rows x 2 columns]
          Date  Deaths
0   2020-01-22      17
1   2020-01-23      18
2   2020-01-24      26
3   2020-01-25      42
4   2020-01-26      56
..         ...     ...
131 2020-06-01  375543
132 2020-06-02  380249
133 2020-06-03  385947
134 2020-06-04  391136
135 2020-06-05  394875

[136 rows x 2 columns]


In [30]:
df.isnull().sum()

Date                  0
Province/State    26656
Country               0
Lat                   0
Long                  0
Confirmed             0
Recovered             0
Deaths                0
Active                0
dtype: int64

In [33]:
df['Province/State'] = df['Province/State'].fillna("")
df.isnull().sum()

Date              0
Province/State    0
Country           0
Lat               0
Long              0
Confirmed         0
Recovered         0
Deaths            0
Active            0
dtype: int64

In [35]:
df.query('Country == "China"')

Unnamed: 0,Date,Province/State,Country,Lat,Long,Confirmed,Recovered,Deaths,Active
6664,2020-01-22,Anhui,China,31.8257,117.2264,1,0,0,1
6665,2020-01-23,Anhui,China,31.8257,117.2264,9,0,0,9
6666,2020-01-24,Anhui,China,31.8257,117.2264,15,0,0,15
6667,2020-01-25,Anhui,China,31.8257,117.2264,39,0,0,39
6668,2020-01-26,Anhui,China,31.8257,117.2264,60,0,0,60
...,...,...,...,...,...,...,...,...,...
36987,2020-06-01,Yunnan,China,24.9740,101.4870,0,183,0,-183
36988,2020-06-02,Yunnan,China,24.9740,101.4870,0,183,0,-183
36989,2020-06-03,Yunnan,China,24.9740,101.4870,0,183,0,-183
36990,2020-06-04,Yunnan,China,24.9740,101.4870,0,183,0,-183


In [25]:
country_wise.head()

Unnamed: 0,Country,Confirmed,Deaths,Recovered,Active,New Cases,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,Population,Cases / Million People,Confirmed last week,1 week change,1 week % increase
0,Afghanistan,18969,309,1762,16898,915,1.63,9.29,17.54,38928341,487.0,13659,5310,38.88
1,Albania,1212,33,910,269,15,2.72,75.08,3.63,2877800,421.0,1099,113,10.28
2,Algeria,9935,690,6453,2792,104,6.95,64.95,10.69,43851043,227.0,9134,801,8.77
3,Andorra,852,51,741,60,0,5.99,86.97,6.88,77265,11027.0,764,88,11.52
4,Angola,86,4,21,61,0,4.65,24.42,19.05,32866268,3.0,81,5,6.17


In [23]:
country_daywise.head()

Unnamed: 0,Date,Country,Confirmed,Deaths,Recovered,Active,New Cases,New Deaths,New Recovered
0,2020-01-22,Afghanistan,0,0,0,0,0,0,0
1,2020-01-22,Albania,0,0,0,0,0,0,0
2,2020-01-22,Algeria,0,0,0,0,0,0,0
3,2020-01-22,Andorra,0,0,0,0,0,0,0
4,2020-01-22,Angola,0,0,0,0,0,0,0


In [24]:
daywise.head()

Unnamed: 0,Date,Confirmed,Deaths,Recovered,Active,New Cases,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,No. of Countries
0,2020-01-22,555,17,28,510,0,3.06,5.05,60.71,6
1,2020-01-23,654,18,30,606,99,2.75,4.59,60.0,8
2,2020-01-24,941,26,36,879,287,2.76,3.83,72.22,9
3,2020-01-25,1434,42,39,1353,493,2.93,2.72,107.69,11
4,2020-01-26,2118,56,52,2010,684,2.64,2.46,107.69,13


In [49]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = confirmed_df['Date'], y = confirmed_df['Confirmed'], \
                mode = 'lines+markers', name = 'confirmed cases', \
                line = dict(color = "Orange", width = 2)))

fig.add_trace(go.Scatter(x = recovered_df['Date'], y = recovered_df['Recovered'], \
                mode = 'lines+markers', name = 'recovered cases', \
                line = dict(color = "Blue", width = 2)))

fig.add_trace(go.Scatter(x = deaths_df['Date'], y = deaths_df['Deaths'], \
                mode = 'lines+markers', name = 'deaths cases', \
                line = dict(color = "Black", width = 2)))
fig.update_layout(title = "Worldwide Covid-19 Cases", xaxis_tickfont_size = 14, \
                 yaxis = dict(title = "Number of Cases"))
fig.show()