In [1]:
# Import Important Module
import numpy as np 
import pandas as pd
from datetime import date

In [2]:
# Load Dataset
data = pd.read_csv("2019_nCoV_data.csv",parse_dates=["Date"])
data.drop('Sno',axis=1,inplace=True)

# Change name of country from "Mainland China" to "China"
data.replace(to_replace='Mainland China',value='China', inplace=True)
data.head()

Unnamed: 0,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,2020-01-22 12:00:00,Anhui,China,01/22/2020 12:00:00,1.0,0.0,0.0
1,2020-01-22 12:00:00,Beijing,China,01/22/2020 12:00:00,14.0,0.0,0.0
2,2020-01-22 12:00:00,Chongqing,China,01/22/2020 12:00:00,6.0,0.0,0.0
3,2020-01-22 12:00:00,Fujian,China,01/22/2020 12:00:00,1.0,0.0,0.0
4,2020-01-22 12:00:00,Gansu,China,01/22/2020 12:00:00,0.0,0.0,0.0


In [3]:
# Number of countries are effected due to corona virus
countryList = data['Country'].unique().tolist()
print("\nTotal countries affected by virus: ",len(countryList))


Total countries affected by virus:  33


In [4]:
# Get latest date and convert that date into string to find out year,month and day
d = data['Date'][-1:].astype('str')

year = int(d.values[0].split('-')[0])
month = int(d.values[0].split('-')[1])
day = int(d.values[0].split('-')[2].split()[0])

# Generate latest datetime(i.e. Last Date) in pandas series type
latestDate = pd.Timestamp(date(year,month,day))

# Get only data which hase more than latest datetime
data_latest = data[data['Date'] > latestDate]
data_latest.head()

Unnamed: 0,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
1494,2020-02-15 22:00:00,Hubei,China,2020-02-15T23:13:05,56249.0,1596.0,5623.0
1495,2020-02-15 22:00:00,Guangdong,China,2020-02-15T14:03:04,1294.0,2.0,410.0
1496,2020-02-15 22:00:00,Henan,China,2020-02-15T12:13:08,1212.0,13.0,391.0
1497,2020-02-15 22:00:00,Zhejiang,China,2020-02-15T11:23:17,1162.0,0.0,428.0
1498,2020-02-15 22:00:00,Hunan,China,2020-02-15T13:03:04,1001.0,2.0,425.0


In [5]:
# Total no of confirmed cases for every country
cases = pd.DataFrame(data_latest.groupby('Country')['Confirmed','Deaths','Recovered'].sum())
cases.sort_values(by=['Confirmed'],ascending=False)

Unnamed: 0_level_0,Confirmed,Deaths,Recovered
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
China,68347.0,1662.0,9294.0
Others,285.0,0.0,0.0
Singapore,72.0,0.0,18.0
Hong Kong,56.0,1.0,1.0
Japan,43.0,1.0,12.0
Thailand,33.0,0.0,12.0
South Korea,28.0,0.0,9.0
Malaysia,22.0,0.0,7.0
Taiwan,18.0,0.0,2.0
Vietnam,16.0,0.0,7.0


In [6]:
# A look at the different cases - confirmed, death and recovered
print('Globally Confirmed Cases: ',data_latest['Confirmed'].sum())
print('Global Deaths: ',data_latest['Deaths'].sum())
print('Globally Recovered Cases: ',data_latest['Recovered'].sum())

Globally Confirmed Cases:  69032.0
Global Deaths:  1666.0
Globally Recovered Cases:  9395.0


In [7]:
# Various Provinces/States affected
data_latest.groupby(['Country','Province/State']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Confirmed,Deaths,Recovered
Country,Province/State,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Australia,New South Wales,4.0,0.0,4.0
Australia,Queensland,5.0,0.0,0.0
Australia,South Australia,2.0,0.0,0.0
Australia,Victoria,4.0,0.0,4.0
Canada,British Columbia,4.0,0.0,0.0
Canada,"London, ON",1.0,0.0,1.0
Canada,"Toronto, ON",2.0,0.0,0.0
China,Anhui,950.0,6.0,221.0
China,Beijing,375.0,4.0,98.0
China,Chongqing,544.0,5.0,184.0


In [8]:
# Death Case
deathFilter = pd.DataFrame(data_latest.groupby('Country')['Deaths'].sum().sort_values(ascending=False))
deathFilter[deathFilter['Deaths']>0]

Unnamed: 0_level_0,Deaths
Country,Unnamed: 1_level_1
China,1662.0
France,1.0
Hong Kong,1.0
Philippines,1.0
Japan,1.0


In [9]:
# Recovery Case
recoveryFilter = pd.DataFrame(data_latest.groupby('Country')['Recovered'].sum().sort_values(ascending=False))
recoveryFilter[recoveryFilter['Recovered']>0]

Unnamed: 0_level_0,Recovered
Country,Unnamed: 1_level_1
China,9294.0
Singapore,18.0
Japan,12.0
Thailand,12.0
South Korea,9.0
Australia,8.0
Vietnam,7.0
Malaysia,7.0
France,4.0
US,3.0


In [10]:
# Confirm Case
confirmFilter = pd.DataFrame(data_latest.groupby('Country')['Confirmed'].sum().sort_values(ascending=False))
confirmFilter[confirmFilter['Confirmed']>0]

Unnamed: 0_level_0,Confirmed
Country,Unnamed: 1_level_1
China,68347.0
Others,285.0
Singapore,72.0
Hong Kong,56.0
Japan,43.0
Thailand,33.0
South Korea,28.0
Malaysia,22.0
Taiwan,18.0
Germany,16.0
