# COVID-19 DATA ANALYSIS
## Hieu Tran
### MCI - Magic Code Institute
![](https://www.gau.edu.tr/storage//uploads/0/0/0/coronavirus-cdc-1585869759.jpg?vs=1)

In [1]:
# storing and anaysis
import numpy as np
import pandas as pd

# visualization
import matplotlib.pyplot as plt
import matplotlib.dates as dt
import seaborn as sns
sns.set(style="whitegrid")
%matplotlib inline
%matplotlib notebook

# hide warnings
import warnings
warnings.filterwarnings('ignore')

from datetime import timedelta  
from datetime import datetime 

## Data Fields:
- Date: record date
- Country/Region: 
- Province/State: contains NULL
- Lat: Lattitude
- Long: Longitude
- Confirmed: Number of confirmed cases
- Recover: Number of recovered cases
- Deaths: Number of deaths

In [2]:
full_table = pd.read_csv('time-series-19-covid-combined.csv', 
                         parse_dates=['Date'])
full_table.head()

Unnamed: 0,Date,Country/Region,Province/State,Lat,Long,Confirmed,Recovered,Deaths
0,2020-01-22,Afghanistan,,33.0,65.0,0.0,0.0,0.0
1,2020-01-23,Afghanistan,,33.0,65.0,0.0,0.0,0.0
2,2020-01-24,Afghanistan,,33.0,65.0,0.0,0.0,0.0
3,2020-01-25,Afghanistan,,33.0,65.0,0.0,0.0,0.0
4,2020-01-26,Afghanistan,,33.0,65.0,0.0,0.0,0.0


In [3]:
# cases 
cases = ['Confirmed', 'Deaths', 'Recovered', 'Active']

### Add 1 more data field
Active = Confirmed - Deaths - Recovered

In [4]:
# Active Case = confirmed - deaths - recovered
full_table['Active'] = full_table['Confirmed'] - full_table['Deaths'] - full_table['Recovered']
full_table

Unnamed: 0,Date,Country/Region,Province/State,Lat,Long,Confirmed,Recovered,Deaths,Active
0,2020-01-22,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
1,2020-01-23,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
2,2020-01-24,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
3,2020-01-25,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
4,2020-01-26,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
34705,2020-05-26,Zimbabwe,,-20.0,30.0,56.0,25.0,4.0,27.0
34706,2020-05-27,Zimbabwe,,-20.0,30.0,132.0,25.0,4.0,103.0
34707,2020-05-28,Zimbabwe,,-20.0,30.0,149.0,28.0,4.0,117.0
34708,2020-05-29,Zimbabwe,,-20.0,30.0,149.0,28.0,4.0,117.0


### How many countries/regions COVID-19 has spread?

In [5]:
print(full_table['Country/Region'].unique().shape)
for item in full_table['Country/Region'].unique():
    print(item)

(188,)
Afghanistan
Albania
Algeria
Andorra
Angola
Antigua and Barbuda
Argentina
Armenia
Australia
Austria
Azerbaijan
Bahamas
Bahrain
Bangladesh
Barbados
Belarus
Belgium
Belize
Benin
Bhutan
Bolivia
Bosnia and Herzegovina
Botswana
Brazil
Brunei
Bulgaria
Burkina Faso
Burma
Burundi
Cabo Verde
Cambodia
Cameroon
Canada
Central African Republic
Chad
Chile
China
Colombia
Comoros
Congo (Brazzaville)
Congo (Kinshasa)
Costa Rica
Cote d'Ivoire
Croatia
Cuba
Cyprus
Czechia
Denmark
Diamond Princess
Djibouti
Dominica
Dominican Republic
Ecuador
Egypt
El Salvador
Equatorial Guinea
Eritrea
Estonia
Eswatini
Ethiopia
Fiji
Finland
France
Gabon
Gambia
Georgia
Germany
Ghana
Greece
Grenada
Guatemala
Guinea-Bissau
Guinea
Guyana
Haiti
Holy See
Honduras
Hungary
Iceland
India
Indonesia
Iran
Iraq
Ireland
Israel
Italy
Jamaica
Japan
Jordan
Kazakhstan
Kenya
Korea, South
Kosovo
Kuwait
Kyrgyzstan
Laos
Latvia
Lebanon
Lesotho
Liberia
Libya
Liechtenstein
Lithuania
Luxembourg
MS Zaandam
Madagascar
Malawi
Malaysia
Maldives
M

In [6]:
# replacing Mainland china with just China
full_table['Country/Region'] = full_table['Country/Region'].replace('Mainland China', 'China')

In [7]:
# filling missing values 
full_table[['Province/State']] = full_table[['Province/State']].fillna('')
full_table[cases] = full_table[cases].fillna(0)
full_table

Unnamed: 0,Date,Country/Region,Province/State,Lat,Long,Confirmed,Recovered,Deaths,Active
0,2020-01-22,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
1,2020-01-23,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
2,2020-01-24,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
3,2020-01-25,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
4,2020-01-26,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
34705,2020-05-26,Zimbabwe,,-20.0,30.0,56.0,25.0,4.0,27.0
34706,2020-05-27,Zimbabwe,,-20.0,30.0,132.0,25.0,4.0,103.0
34707,2020-05-28,Zimbabwe,,-20.0,30.0,149.0,28.0,4.0,117.0
34708,2020-05-29,Zimbabwe,,-20.0,30.0,149.0,28.0,4.0,117.0


### Cases in the ships

In [8]:
# cases in the ships
ship = full_table[full_table['Province/State'].str.contains('Grand Princess')|full_table['Province/State'].str.contains('Diamond Princess cruise ship')]
ship

Unnamed: 0,Date,Country/Region,Province/State,Lat,Long,Confirmed,Recovered,Deaths,Active
5460,2020-01-22,Canada,Grand Princess,37.6489,-122.6655,0.0,0.0,0.0,0.0
5461,2020-01-23,Canada,Grand Princess,37.6489,-122.6655,0.0,0.0,0.0,0.0
5462,2020-01-24,Canada,Grand Princess,37.6489,-122.6655,0.0,0.0,0.0,0.0
5463,2020-01-25,Canada,Grand Princess,37.6489,-122.6655,0.0,0.0,0.0,0.0
5464,2020-01-26,Canada,Grand Princess,37.6489,-122.6655,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
5585,2020-05-26,Canada,Grand Princess,37.6489,-122.6655,13.0,0.0,0.0,0.0
5586,2020-05-27,Canada,Grand Princess,37.6489,-122.6655,13.0,0.0,0.0,0.0
5587,2020-05-28,Canada,Grand Princess,37.6489,-122.6655,13.0,0.0,0.0,0.0
5588,2020-05-29,Canada,Grand Princess,37.6489,-122.6655,13.0,0.0,0.0,0.0


### More cases in the ships

In [9]:
full_table[full_table['Province/State'].str.contains('Princess')]

Unnamed: 0,Date,Country/Region,Province/State,Lat,Long,Confirmed,Recovered,Deaths,Active
5330,2020-01-22,Canada,Diamond Princess,0.0000,0.0000,0.0,0.0,0.0,0.0
5331,2020-01-23,Canada,Diamond Princess,0.0000,0.0000,0.0,0.0,0.0,0.0
5332,2020-01-24,Canada,Diamond Princess,0.0000,0.0000,0.0,0.0,0.0,0.0
5333,2020-01-25,Canada,Diamond Princess,0.0000,0.0000,0.0,0.0,0.0,0.0
5334,2020-01-26,Canada,Diamond Princess,0.0000,0.0000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
5585,2020-05-26,Canada,Grand Princess,37.6489,-122.6655,13.0,0.0,0.0,0.0
5586,2020-05-27,Canada,Grand Princess,37.6489,-122.6655,13.0,0.0,0.0,0.0
5587,2020-05-28,Canada,Grand Princess,37.6489,-122.6655,13.0,0.0,0.0,0.0
5588,2020-05-29,Canada,Grand Princess,37.6489,-122.6655,13.0,0.0,0.0,0.0


### Split to "china" table and "row" table

In [10]:
# china and the row
china = full_table[full_table['Country/Region']=='China']
row = full_table[full_table['Country/Region']!='China']

In [11]:
china

Unnamed: 0,Date,Country/Region,Province/State,Lat,Long,Confirmed,Recovered,Deaths,Active
7410,2020-01-22,China,Anhui,31.8257,117.2264,1.0,0.0,0.0,1.0
7411,2020-01-23,China,Anhui,31.8257,117.2264,9.0,0.0,0.0,9.0
7412,2020-01-24,China,Anhui,31.8257,117.2264,15.0,0.0,0.0,15.0
7413,2020-01-25,China,Anhui,31.8257,117.2264,39.0,0.0,0.0,39.0
7414,2020-01-26,China,Anhui,31.8257,117.2264,60.0,0.0,0.0,60.0
...,...,...,...,...,...,...,...,...,...
11695,2020-05-26,China,Zhejiang,29.1832,120.0934,1268.0,1267.0,1.0,0.0
11696,2020-05-27,China,Zhejiang,29.1832,120.0934,1268.0,1267.0,1.0,0.0
11697,2020-05-28,China,Zhejiang,29.1832,120.0934,1268.0,1267.0,1.0,0.0
11698,2020-05-29,China,Zhejiang,29.1832,120.0934,1268.0,1267.0,1.0,0.0


In [12]:
row

Unnamed: 0,Date,Country/Region,Province/State,Lat,Long,Confirmed,Recovered,Deaths,Active
0,2020-01-22,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
1,2020-01-23,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
2,2020-01-24,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
3,2020-01-25,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
4,2020-01-26,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
34705,2020-05-26,Zimbabwe,,-20.0,30.0,56.0,25.0,4.0,27.0
34706,2020-05-27,Zimbabwe,,-20.0,30.0,132.0,25.0,4.0,103.0
34707,2020-05-28,Zimbabwe,,-20.0,30.0,149.0,28.0,4.0,117.0
34708,2020-05-29,Zimbabwe,,-20.0,30.0,149.0,28.0,4.0,117.0


In [13]:
# latest
full_latest = full_table[full_table['Date'] == max(full_table['Date'])].reset_index()
china_latest = full_latest[full_latest['Country/Region']=='China']
row_latest = full_latest[full_latest['Country/Region']!='China']

In [14]:
full_latest

Unnamed: 0,index,Date,Country/Region,Province/State,Lat,Long,Confirmed,Recovered,Deaths,Active
0,129,2020-05-30,Afghanistan,,33.000000,65.000000,14525.0,1303.0,249.0,12973.0
1,259,2020-05-30,Albania,,41.153300,20.168300,1122.0,857.0,33.0,232.0
2,389,2020-05-30,Algeria,,28.033900,1.659600,9267.0,5549.0,646.0,3072.0
3,519,2020-05-30,Andorra,,42.506300,1.521800,764.0,692.0,51.0,21.0
4,649,2020-05-30,Angola,,-11.202700,17.873900,84.0,18.0,4.0,62.0
...,...,...,...,...,...,...,...,...,...,...
262,34189,2020-05-30,West Bank and Gaza,,31.952200,35.233200,447.0,368.0,3.0,76.0
263,34319,2020-05-30,Western Sahara,,24.215500,-12.885800,9.0,6.0,1.0,2.0
264,34449,2020-05-30,Yemen,,15.552727,48.516388,310.0,13.0,77.0,220.0
265,34579,2020-05-30,Zambia,,-15.416700,28.283300,1057.0,779.0,7.0,271.0


In [15]:
china_latest

Unnamed: 0,index,Date,Country/Region,Province/State,Lat,Long,Confirmed,Recovered,Deaths,Active
57,7539,2020-05-30,China,Anhui,31.8257,117.2264,991.0,985.0,6.0,0.0
58,7669,2020-05-30,China,Beijing,40.1824,116.4142,593.0,581.0,9.0,3.0
59,7799,2020-05-30,China,Chongqing,30.0572,107.874,579.0,573.0,6.0,0.0
60,7929,2020-05-30,China,Fujian,26.0789,117.9874,358.0,355.0,1.0,2.0
61,8059,2020-05-30,China,Gansu,37.8099,101.0583,139.0,137.0,2.0,0.0
62,8189,2020-05-30,China,Guangdong,23.3417,113.4244,1593.0,1583.0,8.0,2.0
63,8319,2020-05-30,China,Guangxi,23.8298,108.7881,254.0,252.0,2.0,0.0
64,8449,2020-05-30,China,Guizhou,26.8154,106.8748,147.0,145.0,2.0,0.0
65,8579,2020-05-30,China,Hainan,19.1959,109.7453,169.0,163.0,6.0,0.0
66,8709,2020-05-30,China,Hebei,39.549,116.1306,328.0,322.0,6.0,0.0


In [16]:
row_latest

Unnamed: 0,index,Date,Country/Region,Province/State,Lat,Long,Confirmed,Recovered,Deaths,Active
0,129,2020-05-30,Afghanistan,,33.000000,65.000000,14525.0,1303.0,249.0,12973.0
1,259,2020-05-30,Albania,,41.153300,20.168300,1122.0,857.0,33.0,232.0
2,389,2020-05-30,Algeria,,28.033900,1.659600,9267.0,5549.0,646.0,3072.0
3,519,2020-05-30,Andorra,,42.506300,1.521800,764.0,692.0,51.0,21.0
4,649,2020-05-30,Angola,,-11.202700,17.873900,84.0,18.0,4.0,62.0
...,...,...,...,...,...,...,...,...,...,...
262,34189,2020-05-30,West Bank and Gaza,,31.952200,35.233200,447.0,368.0,3.0,76.0
263,34319,2020-05-30,Western Sahara,,24.215500,-12.885800,9.0,6.0,1.0,2.0
264,34449,2020-05-30,Yemen,,15.552727,48.516388,310.0,13.0,77.0,220.0
265,34579,2020-05-30,Zambia,,-15.416700,28.283300,1057.0,779.0,7.0,271.0


In [17]:
# latest condensed
full_latest_grouped = full_latest.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
china_latest_grouped = china_latest.groupby('Province/State')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
row_latest_grouped = row_latest.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()

In [18]:
full_latest_grouped

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active
0,Afghanistan,14525.0,249.0,1303.0,12973.0
1,Albania,1122.0,33.0,857.0,232.0
2,Algeria,9267.0,646.0,5549.0,3072.0
3,Andorra,764.0,51.0,692.0,21.0
4,Angola,84.0,4.0,18.0,62.0
...,...,...,...,...,...
183,West Bank and Gaza,447.0,3.0,368.0,76.0
184,Western Sahara,9.0,1.0,6.0,2.0
185,Yemen,310.0,77.0,13.0,220.0
186,Zambia,1057.0,7.0,779.0,271.0


In [19]:
china_latest_grouped

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered,Active
0,Anhui,991.0,6.0,985.0,0.0
1,Beijing,593.0,9.0,581.0,3.0
2,Chongqing,579.0,6.0,573.0,0.0
3,Fujian,358.0,1.0,355.0,2.0
4,Gansu,139.0,2.0,137.0,0.0
5,Guangdong,1593.0,8.0,1583.0,2.0
6,Guangxi,254.0,2.0,252.0,0.0
7,Guizhou,147.0,2.0,145.0,0.0
8,Hainan,169.0,6.0,163.0,0.0
9,Hebei,328.0,6.0,322.0,0.0


In [20]:
row_latest_grouped

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active
0,Afghanistan,14525.0,249.0,1303.0,12973.0
1,Albania,1122.0,33.0,857.0,232.0
2,Algeria,9267.0,646.0,5549.0,3072.0
3,Andorra,764.0,51.0,692.0,21.0
4,Angola,84.0,4.0,18.0,62.0
...,...,...,...,...,...
182,West Bank and Gaza,447.0,3.0,368.0,76.0
183,Western Sahara,9.0,1.0,6.0,2.0
184,Yemen,310.0,77.0,13.0,220.0
185,Zambia,1057.0,7.0,779.0,271.0


# World-Wide Totals

In [21]:
temp = full_table.groupby('Date')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
temp = temp[temp['Date']==max(temp['Date'])].reset_index(drop=True)
#temp.style.background_gradient(cmap='Pastel1')
temp

Unnamed: 0,Date,Confirmed,Deaths,Recovered,Active
0,2020-05-30,6059017.0,369126.0,2564693.0,3089193.0


# Cummulative Outcomes

In [22]:
temp = full_table.groupby('Date')['Recovered', 'Deaths', 'Active'].sum().reset_index()
temp

Unnamed: 0,Date,Recovered,Deaths,Active
0,2020-01-22,28.0,17.0,510.0
1,2020-01-23,30.0,18.0,606.0
2,2020-01-24,36.0,26.0,879.0
3,2020-01-25,39.0,42.0,1353.0
4,2020-01-26,52.0,56.0,2009.0
...,...,...,...,...
125,2020-05-26,2286956.0,350452.0,2916233.0
126,2020-05-27,2350088.0,355628.0,2950209.0
127,2020-05-28,2415960.0,360308.0,2996645.0
128,2020-05-29,2493535.0,364867.0,3029932.0


In [23]:
temp = temp.melt(id_vars="Date", value_vars=['Recovered', 'Deaths', 'Active'],
                 var_name='Case', value_name='Count')
temp

Unnamed: 0,Date,Case,Count
0,2020-01-22,Recovered,28.0
1,2020-01-23,Recovered,30.0
2,2020-01-24,Recovered,36.0
3,2020-01-25,Recovered,39.0
4,2020-01-26,Recovered,52.0
...,...,...,...
385,2020-05-26,Active,2916233.0
386,2020-05-27,Active,2950209.0
387,2020-05-28,Active,2996645.0
388,2020-05-29,Active,3029932.0


In [24]:
fig = plt.figure()
ax = sns.lineplot(x="Date", y="Count", hue="Case", style="Case", data=temp)
#ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.xaxis.set_major_locator(plt.MaxNLocator(10))

<IPython.core.display.Javascript object>

# Recovery and Mortality Rate

In [25]:
temp = full_table.groupby('Date').sum().reset_index()
temp

Unnamed: 0,Date,Lat,Long,Confirmed,Recovered,Deaths,Active
0,2020-01-22,5681.509963,5888.931462,555.0,28.0,17.0,510.0
1,2020-01-23,5681.509963,5888.931462,654.0,30.0,18.0,606.0
2,2020-01-24,5681.509963,5888.931462,941.0,36.0,26.0,879.0
3,2020-01-25,5681.509963,5888.931462,1434.0,39.0,42.0,1353.0
4,2020-01-26,5681.509963,5888.931462,2118.0,52.0,56.0,2009.0
...,...,...,...,...,...,...,...
125,2020-05-26,5681.509963,5888.931462,5589626.0,2286956.0,350452.0,2916233.0
126,2020-05-27,5681.509963,5888.931462,5691790.0,2350088.0,355628.0,2950209.0
127,2020-05-28,5681.509963,5888.931462,5808946.0,2415960.0,360308.0,2996645.0
128,2020-05-29,5681.509963,5888.931462,5924275.0,2493535.0,364867.0,3029932.0


In [26]:
temp['No. of Deaths to 100 Confirmed Cases'] = round(temp['Deaths']/temp['Confirmed'], 3)*100
temp['No. of Recovered to 100 Confirmed Cases'] = round(temp['Recovered']/temp['Confirmed'], 3)*100
temp

Unnamed: 0,Date,Lat,Long,Confirmed,Recovered,Deaths,Active,No. of Deaths to 100 Confirmed Cases,No. of Recovered to 100 Confirmed Cases
0,2020-01-22,5681.509963,5888.931462,555.0,28.0,17.0,510.0,3.1,5.0
1,2020-01-23,5681.509963,5888.931462,654.0,30.0,18.0,606.0,2.8,4.6
2,2020-01-24,5681.509963,5888.931462,941.0,36.0,26.0,879.0,2.8,3.8
3,2020-01-25,5681.509963,5888.931462,1434.0,39.0,42.0,1353.0,2.9,2.7
4,2020-01-26,5681.509963,5888.931462,2118.0,52.0,56.0,2009.0,2.6,2.5
...,...,...,...,...,...,...,...,...,...
125,2020-05-26,5681.509963,5888.931462,5589626.0,2286956.0,350452.0,2916233.0,6.3,40.9
126,2020-05-27,5681.509963,5888.931462,5691790.0,2350088.0,355628.0,2950209.0,6.2,41.3
127,2020-05-28,5681.509963,5888.931462,5808946.0,2415960.0,360308.0,2996645.0,6.2,41.6
128,2020-05-29,5681.509963,5888.931462,5924275.0,2493535.0,364867.0,3029932.0,6.2,42.1


In [27]:
temp = temp.melt(id_vars='Date', 
                 value_vars=['No. of Deaths to 100 Confirmed Cases',
                             'No. of Recovered to 100 Confirmed Cases'], 
                 var_name='Ratio', value_name='Value')

temp

Unnamed: 0,Date,Ratio,Value
0,2020-01-22,No. of Deaths to 100 Confirmed Cases,3.1
1,2020-01-23,No. of Deaths to 100 Confirmed Cases,2.8
2,2020-01-24,No. of Deaths to 100 Confirmed Cases,2.8
3,2020-01-25,No. of Deaths to 100 Confirmed Cases,2.9
4,2020-01-26,No. of Deaths to 100 Confirmed Cases,2.6
...,...,...,...
255,2020-05-26,No. of Recovered to 100 Confirmed Cases,40.9
256,2020-05-27,No. of Recovered to 100 Confirmed Cases,41.3
257,2020-05-28,No. of Recovered to 100 Confirmed Cases,41.6
258,2020-05-29,No. of Recovered to 100 Confirmed Cases,42.1


In [28]:
fig = plt.figure()
ax = sns.lineplot(x="Date", y="Value", hue="Ratio", style="Ratio", data=temp)
#ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.xaxis.set_major_locator(plt.MaxNLocator(10))

<IPython.core.display.Javascript object>

# No. of Places To Which COVID-19 spread

In [29]:
c_spread = china[china['Confirmed']!=0].groupby('Date')['Province/State'].unique().apply(len)
c_spread = pd.DataFrame(c_spread).reset_index()
c_spread

Unnamed: 0,Date,Province/State
0,2020-01-22,24
1,2020-01-23,30
2,2020-01-24,31
3,2020-01-25,32
4,2020-01-26,32
...,...,...
125,2020-05-26,33
126,2020-05-27,33
127,2020-05-28,33
128,2020-05-29,33


In [30]:
fig = plt.figure()
ax = sns.lineplot(x="Date", y="Province/State", data=c_spread)
#ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.xaxis.set_major_locator(plt.MaxNLocator(10))

<IPython.core.display.Javascript object>

In [31]:
spread = full_table[full_table['Confirmed']!=0].groupby('Date')['Country/Region'].unique().apply(len)
spread = pd.DataFrame(spread).reset_index()
spread

Unnamed: 0,Date,Country/Region
0,2020-01-22,6
1,2020-01-23,8
2,2020-01-24,9
3,2020-01-25,11
4,2020-01-26,13
...,...,...
125,2020-05-26,188
126,2020-05-27,188
127,2020-05-28,188
128,2020-05-29,188


In [32]:
fig = plt.figure()
ax = sns.lineplot(x="Date", y="Country/Region", data=spread)
#ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.xaxis.set_major_locator(plt.MaxNLocator(10))

<IPython.core.display.Javascript object>

# Top 20 Countries

In [33]:
flg = full_latest_grouped
flg

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active
0,Afghanistan,14525.0,249.0,1303.0,12973.0
1,Albania,1122.0,33.0,857.0,232.0
2,Algeria,9267.0,646.0,5549.0,3072.0
3,Andorra,764.0,51.0,692.0,21.0
4,Angola,84.0,4.0,18.0,62.0
...,...,...,...,...,...
183,West Bank and Gaza,447.0,3.0,368.0,76.0
184,Western Sahara,9.0,1.0,6.0,2.0
185,Yemen,310.0,77.0,13.0,220.0
186,Zambia,1057.0,7.0,779.0,271.0


In [34]:
fig = plt.figure()
sns.set_color_codes("pastel")
ax = sns.barplot(x="Confirmed", y="Country/Region", 
                 data=flg.sort_values(by=['Confirmed'], 
                                      ascending=False).head(20))

<IPython.core.display.Javascript object>

In [35]:
fig = plt.figure()
sns.set_color_codes("pastel")
ax = sns.barplot(x="Deaths", y="Country/Region", 
                 data=flg.sort_values(by=['Deaths'], 
                                      ascending=False).head(20))

<IPython.core.display.Javascript object>

In [36]:
fig = plt.figure()
sns.set_color_codes("pastel")
ax = sns.barplot(x="Recovered", y="Country/Region", 
                 data=flg.sort_values(by=['Recovered'], 
                                      ascending=False).head(20))

<IPython.core.display.Javascript object>

In [37]:
fig = plt.figure()
sns.set_color_codes("pastel")
ax = sns.barplot(x="Active", y="Country/Region", 
                 data=flg.sort_values(by=['Active'], 
                                      ascending=False).head(20))

<IPython.core.display.Javascript object>

# No. of Deaths per 100 Confirmed Cases

In [38]:
flg['Mortality Rate'] = round((flg['Deaths']/flg['Confirmed'])*100, 2)
flg

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active,Mortality Rate
0,Afghanistan,14525.0,249.0,1303.0,12973.0,1.71
1,Albania,1122.0,33.0,857.0,232.0,2.94
2,Algeria,9267.0,646.0,5549.0,3072.0,6.97
3,Andorra,764.0,51.0,692.0,21.0,6.68
4,Angola,84.0,4.0,18.0,62.0,4.76
...,...,...,...,...,...,...
183,West Bank and Gaza,447.0,3.0,368.0,76.0,0.67
184,Western Sahara,9.0,1.0,6.0,2.0,11.11
185,Yemen,310.0,77.0,13.0,220.0,24.84
186,Zambia,1057.0,7.0,779.0,271.0,0.66


In [39]:
# (Only countries with more than 100 case are considered)
temp = flg[flg['Confirmed']>100]
temp = temp.sort_values('Mortality Rate', ascending=False)
temp

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active,Mortality Rate
185,Yemen,310.0,77.0,13.0,220.0,24.84
16,Belgium,58186.0,9453.0,15769.0,32964.0,16.25
62,France,188752.0,28774.0,68386.0,91592.0,15.24
85,Italy,232664.0,33340.0,155633.0,43691.0,14.33
178,United Kingdom,274219.0,38458.0,1187.0,234574.0,14.02
...,...,...,...,...,...,...
152,Singapore,34366.0,23.0,20727.0,13616.0,0.07
30,Cambodia,125.0,0.0,123.0,2.0,0.00
175,Uganda,413.0,0.0,72.0,341.0,0.00
182,Vietnam,328.0,0.0,279.0,49.0,0.00


In [40]:
fig = plt.figure()
sns.set_color_codes("pastel")
ax = sns.barplot(x="Mortality Rate", y="Country/Region", data=temp.head(20))

<IPython.core.display.Javascript object>

# Epidemic Span
#### Note : In the graph, last day is shown as one day after the last time a new confirmed cases reported in the Country / Region

In [41]:
# first date
# ----------
first_date = full_table[full_table['Confirmed']>0]
first_date = first_date.groupby('Country/Region')['Date'].agg(['min']).reset_index()
first_date

Unnamed: 0,Country/Region,min
0,Afghanistan,2020-02-24
1,Albania,2020-03-09
2,Algeria,2020-02-25
3,Andorra,2020-03-02
4,Angola,2020-03-20
...,...,...
183,West Bank and Gaza,2020-03-05
184,Western Sahara,2020-04-05
185,Yemen,2020-04-10
186,Zambia,2020-03-18


In [42]:
# last date
# ---------
last_date = full_table.groupby(['Country/Region', 'Date', ])['Confirmed', 'Deaths', 'Recovered']
last_date = last_date.sum().diff().reset_index() 
#diff(): Calculates the difference of a DataFrame element compared with another element in the DataFrame
#(default is the element in the same column of the previous row).
last_date

Unnamed: 0,Country/Region,Date,Confirmed,Deaths,Recovered
0,Afghanistan,2020-01-22,,,
1,Afghanistan,2020-01-23,0.0,0.0,0.0
2,Afghanistan,2020-01-24,0.0,0.0,0.0
3,Afghanistan,2020-01-25,0.0,0.0,0.0
4,Afghanistan,2020-01-26,0.0,0.0,0.0
...,...,...,...,...,...
24435,Zimbabwe,2020-05-26,0.0,0.0,0.0
24436,Zimbabwe,2020-05-27,76.0,0.0,0.0
24437,Zimbabwe,2020-05-28,17.0,0.0,3.0
24438,Zimbabwe,2020-05-29,0.0,0.0,0.0


In [43]:
mask = last_date['Country/Region'] != last_date['Country/Region'].shift(1)
mask

0         True
1        False
2        False
3        False
4        False
         ...  
24435    False
24436    False
24437    False
24438    False
24439    False
Name: Country/Region, Length: 24440, dtype: bool

In [44]:
last_date.loc[mask, 'Confirmed'] = np.nan
last_date.loc[mask, 'Deaths'] = np.nan
last_date.loc[mask, 'Recovered'] = np.nan
last_date

Unnamed: 0,Country/Region,Date,Confirmed,Deaths,Recovered
0,Afghanistan,2020-01-22,,,
1,Afghanistan,2020-01-23,0.0,0.0,0.0
2,Afghanistan,2020-01-24,0.0,0.0,0.0
3,Afghanistan,2020-01-25,0.0,0.0,0.0
4,Afghanistan,2020-01-26,0.0,0.0,0.0
...,...,...,...,...,...
24435,Zimbabwe,2020-05-26,0.0,0.0,0.0
24436,Zimbabwe,2020-05-27,76.0,0.0,0.0
24437,Zimbabwe,2020-05-28,17.0,0.0,3.0
24438,Zimbabwe,2020-05-29,0.0,0.0,0.0


In [45]:
last_date = last_date[last_date['Confirmed']>0]
last_date = last_date.groupby('Country/Region')['Date'].agg(['max']).reset_index()
last_date

Unnamed: 0,Country/Region,max
0,Afghanistan,2020-05-30
1,Albania,2020-05-30
2,Algeria,2020-05-30
3,Andorra,2020-05-29
4,Angola,2020-05-30
...,...,...
183,West Bank and Gaza,2020-05-30
184,Western Sahara,2020-05-24
185,Yemen,2020-05-30
186,Zambia,2020-05-27


In [46]:
first_last = pd.concat([first_date, last_date[['max']]], axis=1)
first_last

Unnamed: 0,Country/Region,min,max
0,Afghanistan,2020-02-24,2020-05-30
1,Albania,2020-03-09,2020-05-30
2,Algeria,2020-02-25,2020-05-30
3,Andorra,2020-03-02,2020-05-29
4,Angola,2020-03-20,2020-05-30
...,...,...,...
183,West Bank and Gaza,2020-03-05,2020-05-30
184,Western Sahara,2020-04-05,2020-05-24
185,Yemen,2020-04-10,2020-05-30
186,Zambia,2020-03-18,2020-05-27


In [47]:
# added 1 more day, which will show the next day as the day on which last case appeared
first_last['max'] = first_last['max'] + timedelta(days=1)

In [48]:
# no. of days
first_last['Days'] = first_last['max'] - first_last['min']

In [49]:
# task column as country
first_last['Task'] = first_last['Country/Region']

In [50]:
# rename columns
first_last.columns = ['Country/Region', 'Start', 'Finish', 'Days', 'Task']

In [51]:
# sort by no. of days
first_last = first_last.sort_values('Days')
first_last.reset_index(drop=True)
first_last

Unnamed: 0,Country/Region,Start,Finish,Days,Task
104,MS Zaandam,2020-03-28,2020-04-02,5 days,MS Zaandam
98,Lesotho,2020-05-13,2020-05-23,10 days,Lesotho
50,Dominica,2020-03-22,2020-04-11,20 days,Dominica
95,Laos,2020-03-24,2020-04-13,20 days,Laos
17,Belize,2020-03-23,2020-04-14,22 days,Belize
...,...,...,...,...,...
168,Thailand,2020-01-22,2020-05-31,130 days,Thailand
36,China,2020-01-22,2020-05-31,130 days,China
87,Japan,2020-01-22,2020-05-31,130 days,Japan
91,"Korea, South",2020-01-22,2020-05-31,130 days,"Korea, South"


In [52]:
fig, ax = plt.subplots()
ax.xaxis.set_major_formatter(plt.NullFormatter())
#ax.xaxis.set_major_locator(plt.MaxNLocator(10))

i = 0
ax = ax.xaxis_date()

colorlist = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
for index, irow in first_last.iterrows():
    i = i + 1
    plt.hlines(irow['Task'], 
               dt.date2num(irow['Start']), 
               dt.date2num(irow['Finish']), 
               #color=colorlist[i%len(colorlist)], 
               linewidth=7)

<IPython.core.display.Javascript object>

# China vs. Not China

In [53]:
# In China
temp = china.groupby('Date')['Confirmed', 'Deaths', 'Recovered'].sum().diff()
temp = temp.reset_index()
temp

Unnamed: 0,Date,Confirmed,Deaths,Recovered
0,2020-01-22,,,
1,2020-01-23,95.0,1.0,2.0
2,2020-01-24,277.0,8.0,6.0
3,2020-01-25,486.0,16.0,3.0
4,2020-01-26,669.0,14.0,10.0
...,...,...,...,...
125,2020-05-26,1.0,0.0,6.0
126,2020-05-27,3.0,0.0,9.0
127,2020-05-28,0.0,0.0,4.0
128,2020-05-29,17.0,0.0,11.0


In [54]:
temp = temp.melt(id_vars="Date",
                 value_vars=['Confirmed', 'Deaths', 'Recovered'])
temp

Unnamed: 0,Date,variable,value
0,2020-01-22,Confirmed,
1,2020-01-23,Confirmed,95.0
2,2020-01-24,Confirmed,277.0
3,2020-01-25,Confirmed,486.0
4,2020-01-26,Confirmed,669.0
...,...,...,...
385,2020-05-26,Recovered,6.0
386,2020-05-27,Recovered,9.0
387,2020-05-28,Recovered,4.0
388,2020-05-29,Recovered,11.0


In [55]:
fig = plt.figure()
sns.set_color_codes("pastel")
ax = sns.lineplot(x="Date", y="value", hue="variable", data=temp)
#ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.xaxis.set_major_locator(plt.MaxNLocator(10))

<IPython.core.display.Javascript object>

In [56]:
#Outside China
temp = row.groupby('Date')['Confirmed', 'Deaths', 'Recovered'].sum().diff()
temp = temp.reset_index()
temp

Unnamed: 0,Date,Confirmed,Deaths,Recovered
0,2020-01-22,,,
1,2020-01-23,4.0,0.0,0.0
2,2020-01-24,10.0,0.0,0.0
3,2020-01-25,7.0,0.0,0.0
4,2020-01-26,15.0,0.0,3.0
...,...,...,...,...
125,2020-05-26,94564.0,4221.0,55212.0
126,2020-05-27,102161.0,5176.0,63123.0
127,2020-05-28,117156.0,4680.0,65868.0
128,2020-05-29,115312.0,4559.0,77564.0


In [57]:
temp = temp.melt(id_vars="Date", 
                 value_vars=['Confirmed', 'Deaths', 'Recovered'])
temp

Unnamed: 0,Date,variable,value
0,2020-01-22,Confirmed,
1,2020-01-23,Confirmed,4.0
2,2020-01-24,Confirmed,10.0
3,2020-01-25,Confirmed,7.0
4,2020-01-26,Confirmed,15.0
...,...,...,...
385,2020-05-26,Recovered,55212.0
386,2020-05-27,Recovered,63123.0
387,2020-05-28,Recovered,65868.0
388,2020-05-29,Recovered,77564.0


In [58]:
fig = plt.figure()
sns.set_color_codes("pastel")
ax = sns.lineplot(x="Date", y="value", hue="variable", data=temp)
#ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.xaxis.set_major_locator(plt.MaxNLocator(10))

<IPython.core.display.Javascript object>

In [59]:
def from_china_or_not(row):
    if row['Country/Region']=='China':
        return 'From China'
    else:
        return 'Outside China'
    
temp = full_table.copy()
temp['Region'] = temp.apply(from_china_or_not, axis=1)
temp

Unnamed: 0,Date,Country/Region,Province/State,Lat,Long,Confirmed,Recovered,Deaths,Active,Region
0,2020-01-22,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0,Outside China
1,2020-01-23,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0,Outside China
2,2020-01-24,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0,Outside China
3,2020-01-25,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0,Outside China
4,2020-01-26,Afghanistan,,33.0,65.0,0.0,0.0,0.0,0.0,Outside China
...,...,...,...,...,...,...,...,...,...,...
34705,2020-05-26,Zimbabwe,,-20.0,30.0,56.0,25.0,4.0,27.0,Outside China
34706,2020-05-27,Zimbabwe,,-20.0,30.0,132.0,25.0,4.0,103.0,Outside China
34707,2020-05-28,Zimbabwe,,-20.0,30.0,149.0,28.0,4.0,117.0,Outside China
34708,2020-05-29,Zimbabwe,,-20.0,30.0,149.0,28.0,4.0,117.0,Outside China


In [60]:
temp = temp.groupby(['Region', 'Date'])['Confirmed', 'Deaths', 'Recovered']
temp = temp.sum().diff().reset_index()
temp

Unnamed: 0,Region,Date,Confirmed,Deaths,Recovered
0,From China,2020-01-22,,,
1,From China,2020-01-23,95.0,1.0,2.0
2,From China,2020-01-24,277.0,8.0,6.0
3,From China,2020-01-25,486.0,16.0,3.0
4,From China,2020-01-26,669.0,14.0,10.0
...,...,...,...,...,...
255,Outside China,2020-05-26,94564.0,4221.0,55212.0
256,Outside China,2020-05-27,102161.0,5176.0,63123.0
257,Outside China,2020-05-28,117156.0,4680.0,65868.0
258,Outside China,2020-05-29,115312.0,4559.0,77564.0


In [61]:
mask = temp['Region'] != temp['Region'].shift(1)
temp.loc[mask, 'Confirmed'] = np.nan
temp.loc[mask, 'Deaths'] = np.nan
temp.loc[mask, 'Recovered'] = np.nan
temp

Unnamed: 0,Region,Date,Confirmed,Deaths,Recovered
0,From China,2020-01-22,,,
1,From China,2020-01-23,95.0,1.0,2.0
2,From China,2020-01-24,277.0,8.0,6.0
3,From China,2020-01-25,486.0,16.0,3.0
4,From China,2020-01-26,669.0,14.0,10.0
...,...,...,...,...,...
255,Outside China,2020-05-26,94564.0,4221.0,55212.0
256,Outside China,2020-05-27,102161.0,5176.0,63123.0
257,Outside China,2020-05-28,117156.0,4680.0,65868.0
258,Outside China,2020-05-29,115312.0,4559.0,77564.0


In [62]:
fig = plt.figure()
sns.set_color_codes("pastel")
ax = sns.lineplot(x="Date", y="Confirmed", hue="Region", data=temp)
#ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.xaxis.set_major_locator(plt.MaxNLocator(10))

<IPython.core.display.Javascript object>

In [63]:
fig = plt.figure()
sns.set_color_codes("pastel")
ax = sns.lineplot(x="Date", y="Deaths", hue="Region", data=temp)
#ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.xaxis.set_major_locator(plt.MaxNLocator(10))

<IPython.core.display.Javascript object>

In [64]:
fig = plt.figure()
sns.set_color_codes("pastel")
ax = sns.lineplot(x="Date", y="Recovered", hue="Region", data=temp)
#ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.xaxis.set_major_locator(plt.MaxNLocator(10))

<IPython.core.display.Javascript object>

# Top 50 Countries By Confirmed Cases

In [65]:
temp_f = full_latest_grouped.sort_values(by='Confirmed', ascending=False).head(50)
temp_f = temp_f.reset_index(drop=True)
temp_f.style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active,Mortality Rate
0,US,1770165.0,103776.0,416461.0,1249928.0,5.86
1,Brazil,498440.0,28834.0,200892.0,268714.0,5.78
2,Russia,396575.0,4555.0,167469.0,224551.0,1.15
3,United Kingdom,274219.0,38458.0,1187.0,234574.0,14.02
4,Spain,239228.0,27125.0,150376.0,61727.0,11.34
5,Italy,232664.0,33340.0,155633.0,43691.0,14.33
6,France,188752.0,28774.0,68386.0,91592.0,15.24
7,Germany,183189.0,8530.0,164908.0,9751.0,4.66
8,India,181827.0,5185.0,86936.0,89706.0,2.85
9,Turkey,163103.0,4515.0,126984.0,31604.0,2.77


# Top 25 Countries By Deaths Reported

In [66]:
temp_flg = temp_f[temp_f['Deaths']>0][['Country/Region', 'Deaths']].head(25)
temp_flg.sort_values('Deaths', ascending=False).reset_index(drop=True).style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Deaths
0,US,103776.0
1,United Kingdom,38458.0
2,Italy,33340.0
3,Brazil,28834.0
4,France,28774.0
5,Spain,27125.0
6,Mexico,9779.0
7,Belgium,9453.0
8,Germany,8530.0
9,Iran,7734.0
