In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta,date
import matplotlib.pyplot as plt
import folium
import plotly.express as px
from matplotlib import ticker 
from scipy.interpolate import make_interp_spline, BSpline
import pycountry_convert as pc
#import geopandas as gpd

## Interactive web-based dashboard https://www.arcgis.com/apps/opsdashboard/index.html#/bda7594740fd40299423467b48e9ecf6

## COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University
### https://github.com/CSSEGISandData/COVID-19/

### Get Data

In [6]:
# Retriving Dataset from 
# https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data
# which is updated daily
 
### Prep data for US
url_confirmed_us = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv"
df_confirmed_us = pd.read_csv( url_confirmed_us )

url_deaths_us = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv"
df_deaths_us = pd.read_csv( url_deaths_us )

In [7]:
df_confirmed_us.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,11/20/20,11/21/20,11/22/20,11/23/20,11/24/20,11/25/20,11/26/20,11/27/20,11/28/20,11/29/20
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,...,2580,2597,2617,2634,2661,2686,2704,2716,2735,2751
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,...,8038,8131,8199,8269,8376,8473,8576,8603,8733,8820
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,...,1151,1157,1160,1161,1167,1170,1170,1171,1173,1175
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,...,1024,1036,1136,1142,1157,1162,1170,1173,1179,1188
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,...,2704,2735,2754,2763,2822,2855,2879,2888,2922,2946


In [9]:
df_deaths_us.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,11/20/20,11/21/20,11/22/20,11/23/20,11/24/20,11/25/20,11/26/20,11/27/20,11/28/20,11/29/20
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,...,39,39,39,39,39,41,42,42,42,42
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,...,84,84,84,84,84,98,98,98,98,98
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,...,10,10,10,10,10,10,10,10,10,11
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,...,18,17,17,17,17,17,17,17,17,17
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,...,35,36,36,36,36,39,40,40,40,40


In [10]:
# Check the size of the each of the three dataframe
print(df_confirmed_us.shape)
print(df_deaths_us.shape)
#print(df_recovered.shape)

(3340, 324)
(3340, 325)


In [11]:
df_confirmed_us.columns[0:20]

Index(['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
       'Country_Region', 'Lat', 'Long_', 'Combined_Key', '1/22/20', '1/23/20',
       '1/24/20', '1/25/20', '1/26/20', '1/27/20', '1/28/20', '1/29/20',
       '1/30/20'],
      dtype='object')

In [12]:
# Transform from wide to long format table
df_confirmed_us = pd.melt(df_confirmed_us, id_vars=df_confirmed_us.columns[:11], 
                    value_vars = df_confirmed_us.columns[11:], 
                    var_name = 'date', 
                    value_name = 'confirmed')
df_confirmed_us.head()



Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,date,confirmed
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",1/22/20,0
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",1/22/20,0
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",1/22/20,0
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",1/22/20,0
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",1/22/20,0


In [13]:
df_deaths_us.columns[0:20]

Index(['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
       'Country_Region', 'Lat', 'Long_', 'Combined_Key', 'Population',
       '1/22/20', '1/23/20', '1/24/20', '1/25/20', '1/26/20', '1/27/20',
       '1/28/20', '1/29/20'],
      dtype='object')

In [14]:
# Transform from wide to long format table
df_deaths_us = pd.melt(df_deaths_us, id_vars=df_deaths_us.columns[:12], 
                    value_vars = df_deaths_us.columns[12:], 
                    var_name = 'date', 
                    value_name = 'deaths')

In [15]:
df_deaths_us.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Population,date,deaths
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",55869,1/22/20,0
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",223234,1/22/20,0
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",24686,1/22/20,0
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",22394,1/22/20,0
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",57826,1/22/20,0


In [16]:
# Check the new size of the each of the three dataframe
print(df_confirmed_us.shape)
print(df_deaths_us.shape)

(1045420, 13)
(1045420, 14)


In [17]:
# Merge the three table together
df_data_us = df_confirmed_us.merge(df_deaths_us, how='inner', on=['Country_Region','Province_State','date','UID','iso2','iso3','FIPS','Admin2','Lat','Long_','Combined_Key','code3'])

In [18]:
df_data_us.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,date,confirmed,Population,deaths
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",1/22/20,0,55869,0
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",1/22/20,0,223234,0
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",1/22/20,0,24686,0
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",1/22/20,0,22394,0
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",1/22/20,0,57826,0


In [19]:
print(df_data_us.shape)

(949642, 15)


In [20]:
df_confirmed_us = df_confirmed_us.drop(['UID','iso2','iso3','code3'],axis=1)

In [21]:
df_deaths_us = df_deaths_us.drop(['UID','iso2','iso3','code3'],axis=1)

In [22]:
# Merge the two tables together
df_data_us2 = df_confirmed_us.merge(df_deaths_us, how='inner', on=['Country_Region','Province_State','date','FIPS','Admin2'])

In [23]:
df_data_us2.head()

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Lat_x,Long__x,Combined_Key_x,date,confirmed,Lat_y,Long__y,Combined_Key_y,Population,deaths
0,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",1/22/20,0,32.539527,-86.644082,"Autauga, Alabama, US",55869,0
1,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",1/22/20,0,30.72775,-87.722071,"Baldwin, Alabama, US",223234,0
2,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",1/22/20,0,31.868263,-85.387129,"Barbour, Alabama, US",24686,0
3,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",1/22/20,0,32.996421,-87.125115,"Bibb, Alabama, US",22394,0
4,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",1/22/20,0,33.982109,-86.567906,"Blount, Alabama, US",57826,0


In [24]:
# Drop the columns 
df_data_us2 = df_data_us2.drop(['Lat_x','Lat_y','Long__x','Long__y','Combined_Key_x','Combined_Key_y','Country_Region'],axis=1)
#df_data_us2 = df_data_us2.drop(['Combined_Key_x','Combined_Key_y'],axis=1)
#df_data_us2 = df_data_us2.drop(['Country_Region'],axis=1)

In [25]:
df_data_us2.head()

Unnamed: 0,FIPS,Admin2,Province_State,date,confirmed,Population,deaths
0,1001.0,Autauga,Alabama,1/22/20,0,55869,0
1,1003.0,Baldwin,Alabama,1/22/20,0,223234,0
2,1005.0,Barbour,Alabama,1/22/20,0,24686,0
3,1007.0,Bibb,Alabama,1/22/20,0,22394,0
4,1009.0,Blount,Alabama,1/22/20,0,57826,0


In [26]:
df_data_us2.to_csv('covid19_confirmed_deaths_us.csv',index=False)

In [27]:
test = pd.read_csv('covid19_confirmed_deaths_us.csv' )

In [28]:
test.head()

Unnamed: 0,FIPS,Admin2,Province_State,date,confirmed,Population,deaths
0,1001.0,Autauga,Alabama,1/22/20,0,55869,0
1,1003.0,Baldwin,Alabama,1/22/20,0,223234,0
2,1005.0,Barbour,Alabama,1/22/20,0,24686,0
3,1007.0,Bibb,Alabama,1/22/20,0,22394,0
4,1009.0,Blount,Alabama,1/22/20,0,57826,0
