In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set()

import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = 'plotly_dark'
from plotly.subplots import make_subplots


In [2]:
train = pd.read_csv('train.csv', parse_dates=['Date'])
test = pd.read_csv('test.csv', parse_dates=['Date'])

In [3]:
cleaned_data = pd.read_csv('covid_19_clean_complete.csv', parse_dates=['Date'])

In [4]:
train.head(10)

Unnamed: 0,Id,Province/State,Country/Region,Lat,Long,Date,ConfirmedCases,Fatalities
0,1,,Afghanistan,33.0,65.0,2020-01-22,0.0,0.0
1,2,,Afghanistan,33.0,65.0,2020-01-23,0.0,0.0
2,3,,Afghanistan,33.0,65.0,2020-01-24,0.0,0.0
3,4,,Afghanistan,33.0,65.0,2020-01-25,0.0,0.0
4,5,,Afghanistan,33.0,65.0,2020-01-26,0.0,0.0
5,6,,Afghanistan,33.0,65.0,2020-01-27,0.0,0.0
6,7,,Afghanistan,33.0,65.0,2020-01-28,0.0,0.0
7,8,,Afghanistan,33.0,65.0,2020-01-29,0.0,0.0
8,9,,Afghanistan,33.0,65.0,2020-01-30,0.0,0.0
9,10,,Afghanistan,33.0,65.0,2020-01-31,0.0,0.0


In [6]:
train.shape, test.shape

((17324, 8), (12212, 6))

In [7]:
train.rename(columns={
    'Id': 'id',
    'Province/State': 'province',
    'Country/Region': 'country',
    'Lat': 'lat',
    'Long': 'long',
    'Date': 'date',
    'ConfirmedCases': 'confirmed',
    'Fatalities': 'death'}, inplace=True)

In [8]:
train.head()

Unnamed: 0,id,province,country,lat,long,date,confirmed,death
0,1,,Afghanistan,33.0,65.0,2020-01-22,0.0,0.0
1,2,,Afghanistan,33.0,65.0,2020-01-23,0.0,0.0
2,3,,Afghanistan,33.0,65.0,2020-01-24,0.0,0.0
3,4,,Afghanistan,33.0,65.0,2020-01-25,0.0,0.0
4,5,,Afghanistan,33.0,65.0,2020-01-26,0.0,0.0


In [9]:
train.describe()

Unnamed: 0,id,lat,long,confirmed,death
count,17324.0,17324.0,17324.0,17324.0,17324.0
mean,13190.5,26.287693,4.766191,293.19118,10.421208
std,7624.680783,22.935113,79.923334,3382.665468,155.612328
min,1.0,-41.4545,-157.4983,0.0,0.0
25%,6595.75,13.145425,-71.516375,0.0,0.0
50%,13190.5,32.98555,9.775,0.0,0.0
75%,19785.25,42.501575,64.688975,8.0,0.0
max,26380.0,71.7069,174.886,67800.0,5476.0


In [10]:
print(train.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17324 entries, 0 to 17323
Data columns (total 8 columns):
id           17324 non-null int64
province     7930 non-null object
country      17324 non-null object
lat          17324 non-null float64
long         17324 non-null float64
date         17324 non-null datetime64[ns]
confirmed    17324 non-null float64
death        17324 non-null float64
dtypes: datetime64[ns](1), float64(4), int64(1), object(2)
memory usage: 1.1+ MB
None


In [11]:
cleaned_data.head(20)

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Thailand,15.0,101.0,2020-01-22,2,0,0
1,,Japan,36.0,138.0,2020-01-22,2,0,0
2,,Singapore,1.2833,103.8333,2020-01-22,0,0,0
3,,Nepal,28.1667,84.25,2020-01-22,0,0,0
4,,Malaysia,2.5,112.5,2020-01-22,0,0,0
5,British Columbia,Canada,49.2827,-123.1207,2020-01-22,0,0,0
6,New South Wales,Australia,-33.8688,151.2093,2020-01-22,0,0,0
7,Victoria,Australia,-37.8136,144.9631,2020-01-22,0,0,0
8,Queensland,Australia,-28.0167,153.4,2020-01-22,0,0,0
9,,Cambodia,11.55,104.9167,2020-01-22,0,0,0


In [12]:
cleaned_data.rename(columns={
    'Id': 'id',
    'Province/State': 'province',
    'Country/Region': 'country',
    'Lat': 'lat',
    'Long': 'long',
    'Date': 'date',
    'Confirmed': 'confirmed',
    'Deaths': 'death',
    'Recovered': 'recovered'}, inplace=True)

In [13]:
cleaned_data.head()

Unnamed: 0,province,country,lat,long,date,confirmed,death,recovered
0,,Thailand,15.0,101.0,2020-01-22,2,0,0
1,,Japan,36.0,138.0,2020-01-22,2,0,0
2,,Singapore,1.2833,103.8333,2020-01-22,0,0,0
3,,Nepal,28.1667,84.25,2020-01-22,0,0,0
4,,Malaysia,2.5,112.5,2020-01-22,0,0,0


In [43]:
cleaned_data['country'] = cleaned_data['country'].replace('Mainland China', 'China')

In [44]:
cleaned_data['province'] = cleaned_data['province'].fillna(' ')

In [45]:
cleaned_data['country'].values

array(['Thailand', 'Japan', 'Singapore', ..., 'Mozambique', 'Syria',
       'Timor-Leste'], dtype=object)

In [46]:
data = cleaned_data

In [67]:
data.country.value_counts()

US             3416
China          2013
Canada          732
France          549
Australia       549
               ... 
Costa Rica       61
Haiti            61
Chile            61
Switzerland      61
Turkey           61
Name: country, Length: 171, dtype: int64

In [50]:
grouped = data.groupby('date')['date', 'confirmed', 'death'].sum().reset_index()

In [51]:
grouped.head()

Unnamed: 0,date,confirmed,death
0,2020-01-22,554,17
1,2020-01-23,652,18
2,2020-01-24,939,26
3,2020-01-25,1432,42
4,2020-01-26,2113,56


In [52]:
fig = px.line(grouped, x='date', y='confirmed', title='Covid19 confirmed count based on time')
fig.update_layout(
    xaxis=dict(
        title='Date'
    ),
    yaxis=dict(
        title='Confirmed Cases'))
fig.show()


In [53]:
china_data = data[data['country'] == "China"].reset_index()
grouped_china = china_data.groupby('date')['date', 'confirmed', 'death'].sum().reset_index()

italy_data = data[data['country'] == "Italy"].reset_index()
grouped_italy = italy_data.groupby('date')['date', 'confirmed', 'death'].sum().reset_index()

usa_data = data[data['country'] == "US"].reset_index()
grouped_usa = usa_data.groupby('date')['date', 'confirmed', 'death'].sum().reset_index()

korea_data = data[data['country'] == "South Korea"].reset_index()
grouped_korea = korea_data.groupby('date')['date', 'confirmed', 'death'].sum().reset_index()

rest_data = data[~data['country'].isin(['China', 'Italy', 'US', 'South Korea'])].reset_index()
grouped_rest = rest_data.groupby('date')['date', 'confirmed', 'death'].sum().reset_index()

In [54]:
rest_data['country']

0           Thailand
1              Japan
2          Singapore
3              Nepal
4           Malaysia
            ...     
12500       Dominica
12501        Grenada
12502     Mozambique
12503          Syria
12504    Timor-Leste
Name: country, Length: 12505, dtype: object

In [55]:
plot_titles = ['China', 'Italy', 'US', 'South Korea', 'Rest of the world']

fig = px.line(grouped_china, x='date', y='confirmed', title=f"Confirmed cases in {plot_titles[0].upper()}", height=500)
fig.show()

fig = px.line(grouped_italy, x='date', y='confirmed', title=f"Confirmed cases in {plot_titles[1].upper()}", height=500)
fig.show()

fig = px.line(grouped_usa, x='date', y='confirmed', title=f"Confirmed cases in {plot_titles[2].upper()}")
fig.show()

fig = px.line(grouped_korea, x='date', y='confirmed', title=f"Confirmed cases in {plot_titles[3].upper()}")
fig.show()

fig = px.line(grouped_rest, x='date', y='confirmed', title=f"Confirmed cases in {plot_titles[4].upper()}")
fig.show()

In [68]:
data['province'] = data['province'].fillna(' ')
not_state = data[[col for col in data.columns if col != 'province']]

In [70]:
not_state.country.value_counts()

US             3416
China          2013
Canada          732
France          549
Australia       549
               ... 
Costa Rica       61
Haiti            61
Chile            61
Switzerland      61
Turkey           61
Name: country, Length: 171, dtype: int64

In [71]:
latest = not_state[not_state['date'] == max(not_state['date'])].reset_index()
grouped_latest = latest.groupby('country')['confirmed', 'death'].sum().reset_index()

In [81]:
chinal = grouped_latest[grouped_latest['country']=='China'].reset_index()

In [82]:
chinal

Unnamed: 0,index,country,confirmed,death
0,33,China,81397,3265


In [85]:
fig = px.choropleth(
    grouped_latest, 
    locations='country', 
    locationmode='country names', 
    color='confirmed', 
    hover_name='country', 
    range_color=[1, 8000], 
    color_continuous_scale='peach', 
    title='Countries with confirmed cases')

fig.show()

In [91]:
fig = px.choropleth(
    grouped_latest,
    locations='country',
    locationmode='country names',
    color = 'confirmed',
    hover_name = 'country',
    range_color = [1, 8000],
    color_continuous_scale = 'portland',
    scope='europe',
    title='European countries with confirmed cases',
    height=800)
fig.show()

In [109]:
fig = px.bar(
    grouped_latest.sort_values('confirmed', ascending=False)[:14][::-1],
    x='confirmed',
    y='country',
    orientation='h',
    text='confirmed',
    title='Confirmed cases',
    height=800)
fig.show()

In [119]:
usa_total = data[data['country'] == 'US']
usa_latest = usa_total[usa_total['date'] == max(usa_total['date'])]

In [125]:
usa_latest = usa_latest.groupby('province')['confirmed', 'death'].sum().reset_index()

In [133]:
fig = px.bar(usa_latest.sort_values('confirmed', ascending=False)[:20][::-1],
            x='confirmed',
            y='province',
            text = 'confirmed',
            title='USA confirmed cases by state',
            orientation='h',
            height=900,
            color_discrete_sequence=['red'])
fig.show()

In [134]:
fig = px.line(grouped, x='date', y='death', title='Death over time')
fig.show()

In [138]:
#grouped_china = china_data.groupby('date')['date', 'confirmed', 'death'].sum().reset_index()
#grouped_italy = italy_data.groupby('date')['date', 'confirmed', 'death'].sum().reset_index()
#grouped_usa = usa_data.groupby('date')['date', 'confirmed', 'death'].sum().reset_index()
#grouped_korea = korea_data.groupby('date')['date', 'confirmed', 'death'].sum().reset_index()
#grouped_rest = rest_data.groupby('date')['date', 'confirmed', 'death'].sum().reset_index()

fig = go.Figure()

fig.add_trace(go.Scatter(x=grouped_china['date'], y=grouped_china['death'],
                        mode='lines',
                        name='China'))
fig.add_trace(go.Scatter(x=grouped_italy['date'], y=grouped_italy['death'],
                        mode='lines',
                        name='Italy'))
fig.add_trace(go.Scatter(x=grouped_usa['date'], y=grouped_usa['death'],
                        mode='lines',
                        name='USA'))
fig.add_trace(go.Scatter(x=grouped_korea['date'], y=grouped_korea['death'],
                        mode='lines',
                        name='Korea'))
fig.add_trace(go.Scatter(x=grouped_rest['date'], y=grouped_rest['death'],
                        mode='lines',
                        name='Rest of World'))
fig.update_layout(title='Confirmed death cases',
                 xaxis=dict(title='Date'),
                 yaxis=dict(title='Death'))
fig.show()