In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta,date
import matplotlib.pyplot as plt
import folium
import plotly.express as px
from matplotlib import ticker 
from scipy.interpolate import make_interp_spline, BSpline
import pycountry_convert as pc
#import geopandas as gpd

## Interactive web-based dashboard https://www.arcgis.com/apps/opsdashboard/index.html#/bda7594740fd40299423467b48e9ecf6

## COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University
### https://github.com/CSSEGISandData/COVID-19/

### Get Data

In [89]:
# Retriving Dataset from 
# https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data
# which is updated daily
 
### Prep data for US
url_confirmed_us = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv"
df_confirmed_us = pd.read_csv( url_confirmed_us )

url_deaths_us = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv"
df_deaths_us = pd.read_csv( url_deaths_us )

In [90]:
df_confirmed_us.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,10/4/20,10/5/20,10/6/20,10/7/20,10/8/20,10/9/20,10/10/20,10/11/20,10/12/20,10/13/20
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,...,1828,1831,1839,1852,1863,1882,1898,1905,1911,1924
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,...,6073,6085,6116,6134,6141,6172,6190,6203,6220,6248
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,...,921,921,923,927,927,939,942,942,944,951
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,...,686,687,691,703,708,719,726,736,738,744
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,...,1656,1662,1665,1673,1681,1689,1704,1713,1722,1742


In [91]:
df_death_us.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,10/4/20,10/5/20,10/6/20,10/7/20,10/8/20,10/9/20,10/10/20,10/11/20,10/12/20,10/13/20
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,...,27,27,27,27,28,28,28,28,28,28
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,...,53,53,55,56,64,64,65,65,65,65
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,...,7,7,7,7,9,9,9,9,9,9
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,...,10,10,10,10,12,12,12,12,12,12
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,...,15,15,15,16,16,16,16,16,16,16


In [92]:
# Check the size of the each of the three dataframe
print(df_confirmed_us.shape)
print(df_deaths_us.shape)
#print(df_recovered.shape)

(3340, 277)
(3340, 278)


In [93]:
df_confirmed_us.columns[0:20]

Index(['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
       'Country_Region', 'Lat', 'Long_', 'Combined_Key', '1/22/20', '1/23/20',
       '1/24/20', '1/25/20', '1/26/20', '1/27/20', '1/28/20', '1/29/20',
       '1/30/20'],
      dtype='object')

In [94]:
# Transform from wide to long format table
df_confirmed_us = pd.melt(df_confirmed_us, id_vars=df_confirmed_us.columns[:11], 
                    value_vars = df_confirmed_us.columns[11:], 
                    var_name = 'date', 
                    value_name = 'confirmed')
df_confirmed_us.head()



Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,date,confirmed
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",1/22/20,0
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",1/22/20,0
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",1/22/20,0
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",1/22/20,0
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",1/22/20,0


In [95]:
df_deaths_us.columns[0:20]

Index(['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
       'Country_Region', 'Lat', 'Long_', 'Combined_Key', 'Population',
       '1/22/20', '1/23/20', '1/24/20', '1/25/20', '1/26/20', '1/27/20',
       '1/28/20', '1/29/20'],
      dtype='object')

In [96]:
# Transform from wide to long format table
df_deaths_us = pd.melt(df_deaths_us, id_vars=df_deaths_us.columns[:12], 
                    value_vars = df_deaths_us.columns[12:], 
                    var_name = 'date', 
                    value_name = 'deaths')

In [97]:
df_deaths_us.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Population,date,deaths
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",55869,1/22/20,0
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",223234,1/22/20,0
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",24686,1/22/20,0
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",22394,1/22/20,0
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",57826,1/22/20,0


In [98]:
# Check the new size of the each of the three dataframe
print(df_confirmed_us.shape)
print(df_deaths_us.shape)

(888440, 13)
(888440, 14)


In [99]:
# Merge the three table together
df_data_us = df_confirmed_us.merge(df_deaths_us, how='inner', on=['Country_Region','Province_State','date','UID','iso2','iso3','FIPS','Admin2','Lat','Long_','Combined_Key','code3'])

In [100]:
df_data_us.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,date,confirmed,Population,deaths
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",1/22/20,0,55869,0
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",1/22/20,0,223234,0
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",1/22/20,0,24686,0
3,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",1/22/20,0,57826,0
4,84001011,US,USA,840,1011.0,Bullock,Alabama,US,32.100305,-85.712655,"Bullock, Alabama, US",1/22/20,0,10101,0


In [101]:
print(df_data_us.shape)

(805714, 15)


In [102]:
df_confirmed_us = df_confirmed_us.drop(['UID','iso2','iso3','code3'],axis=1)

In [103]:
df_deaths_us = df_deaths_us.drop(['UID','iso2','iso3','code3'],axis=1)

In [114]:
# Merge the two tables together
df_data_us2 = df_confirmed_us.merge(df_deaths_us, how='inner', on=['Country_Region','Province_State','date','FIPS','Admin2'])

In [115]:
df_data_us2.head()

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Lat_x,Long__x,Combined_Key_x,date,confirmed,Lat_y,Long__y,Combined_Key_y,Population,deaths
0,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",1/22/20,0,32.539527,-86.644082,"Autauga, Alabama, US",55869,0
1,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",1/22/20,0,30.72775,-87.722071,"Baldwin, Alabama, US",223234,0
2,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",1/22/20,0,31.868263,-85.387129,"Barbour, Alabama, US",24686,0
3,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",1/22/20,0,32.996421,-87.125115,"Bibb, Alabama, US",22394,0
4,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",1/22/20,0,33.982109,-86.567906,"Blount, Alabama, US",57826,0


In [116]:
# Drop the columns 
df_data_us2 = df_data_us2.drop(['Lat_x','Lat_y','Long__x','Long__y','Combined_Key_x','Combined_Key_y','Country_Region'],axis=1)
#df_data_us2 = df_data_us2.drop(['Combined_Key_x','Combined_Key_y'],axis=1)
#df_data_us2 = df_data_us2.drop(['Country_Region'],axis=1)

In [117]:
df_data_us2.head()

Unnamed: 0,FIPS,Admin2,Province_State,date,confirmed,Population,deaths
0,1001.0,Autauga,Alabama,1/22/20,0,55869,0
1,1003.0,Baldwin,Alabama,1/22/20,0,223234,0
2,1005.0,Barbour,Alabama,1/22/20,0,24686,0
3,1007.0,Bibb,Alabama,1/22/20,0,22394,0
4,1009.0,Blount,Alabama,1/22/20,0,57826,0


In [118]:
df_data_us2.to_csv('covid19_confirmed_deaths_us.csv',index=False)

In [119]:
test = pd.read_csv('covid19_confirmed_deaths_us.csv' )

In [121]:
test.head()

Unnamed: 0,FIPS,Admin2,Province_State,date,confirmed,Population,deaths
0,1001.0,Autauga,Alabama,1/22/20,0,55869,0
1,1003.0,Baldwin,Alabama,1/22/20,0,223234,0
2,1005.0,Barbour,Alabama,1/22/20,0,24686,0
3,1007.0,Bibb,Alabama,1/22/20,0,22394,0
4,1009.0,Blount,Alabama,1/22/20,0,57826,0
