In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta,date
import matplotlib.pyplot as plt
import folium
import plotly.express as px
from matplotlib import ticker 
from scipy.interpolate import make_interp_spline, BSpline
import pycountry_convert as pc
#import geopandas as gpd

## Interactive web-based dashboard https://www.arcgis.com/apps/opsdashboard/index.html#/bda7594740fd40299423467b48e9ecf6

## COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University
### https://github.com/CSSEGISandData/COVID-19/

### Get Data

In [2]:
# Retriving Dataset from 
# https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data
# which is updated daily
 
### Prep data for US
url_confirmed_us = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv"
df_confirmed_us = pd.read_csv( url_confirmed_us )

url_deaths_us = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv"
df_deaths_us = pd.read_csv( url_deaths_us )

In [3]:
df_confirmed_us.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,1/6/21,1/7/21,1/8/21,1/9/21,1/10/21,1/11/21,1/12/21,1/13/21,1/14/21,1/15/21
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,...,4645,4705,4770,4847,4879,4902,4970,4998,5075,5103
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,...,14656,14845,15052,15202,15327,15417,15572,15701,15841,16002
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,...,1597,1614,1634,1648,1658,1663,1679,1685,1696,1712
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,...,1944,1981,2015,2038,2051,2060,2090,2109,2113,2130
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,...,4898,4957,5018,5047,5066,5080,5134,5170,5219,5264


In [4]:
df_deaths_us.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,1/6/21,1/7/21,1/8/21,1/9/21,1/10/21,1/11/21,1/12/21,1/13/21,1/14/21,1/15/21
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,...,50,50,50,53,54,55,55,55,55,55
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,...,171,171,171,173,173,173,175,175,177,179
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,...,35,35,35,35,35,35,35,35,36,36
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,...,48,48,48,48,48,48,48,48,47,47
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,...,67,72,75,77,77,77,79,80,80,83


In [5]:
# Check the size of the each of the three dataframe
print(df_confirmed_us.shape)
print(df_deaths_us.shape)
#print(df_recovered.shape)

(3340, 371)
(3340, 372)


In [6]:
df_confirmed_us.columns[0:20]

Index(['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
       'Country_Region', 'Lat', 'Long_', 'Combined_Key', '1/22/20', '1/23/20',
       '1/24/20', '1/25/20', '1/26/20', '1/27/20', '1/28/20', '1/29/20',
       '1/30/20'],
      dtype='object')

In [7]:
# Transform from wide to long format table
df_confirmed_us = pd.melt(df_confirmed_us, id_vars=df_confirmed_us.columns[:11], 
                    value_vars = df_confirmed_us.columns[11:], 
                    var_name = 'date', 
                    value_name = 'confirmed')
df_confirmed_us.head()



Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,date,confirmed
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",1/22/20,0
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",1/22/20,0
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",1/22/20,0
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",1/22/20,0
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",1/22/20,0


In [8]:
df_deaths_us.columns[0:20]

Index(['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
       'Country_Region', 'Lat', 'Long_', 'Combined_Key', 'Population',
       '1/22/20', '1/23/20', '1/24/20', '1/25/20', '1/26/20', '1/27/20',
       '1/28/20', '1/29/20'],
      dtype='object')

In [9]:
# Transform from wide to long format table
df_deaths_us = pd.melt(df_deaths_us, id_vars=df_deaths_us.columns[:12], 
                    value_vars = df_deaths_us.columns[12:], 
                    var_name = 'date', 
                    value_name = 'deaths')

In [10]:
df_deaths_us.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Population,date,deaths
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",55869,1/22/20,0
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",223234,1/22/20,0
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",24686,1/22/20,0
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",22394,1/22/20,0
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",57826,1/22/20,0


In [11]:
# Check the new size of the each of the three dataframe
print(df_confirmed_us.shape)
print(df_deaths_us.shape)

(1202400, 13)
(1202400, 14)


In [12]:
# Merge the three table together
df_data_us = df_confirmed_us.merge(df_deaths_us, how='inner', on=['Country_Region','Province_State','date','UID','iso2','iso3','FIPS','Admin2','Lat','Long_','Combined_Key','code3'])

In [13]:
df_data_us.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,date,confirmed,Population,deaths
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",1/22/20,0,55869,0
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",1/22/20,0,223234,0
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",1/22/20,0,24686,0
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",1/22/20,0,22394,0
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",1/22/20,0,57826,0


In [14]:
print(df_data_us.shape)

(1092240, 15)


In [15]:
df_confirmed_us = df_confirmed_us.drop(['UID','iso2','iso3','code3'],axis=1)

In [16]:
df_deaths_us = df_deaths_us.drop(['UID','iso2','iso3','code3'],axis=1)

In [17]:
# Merge the two tables together
df_data_us2 = df_confirmed_us.merge(df_deaths_us, how='inner', on=['Country_Region','Province_State','date','FIPS','Admin2'])

In [18]:
df_data_us2.head()

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Lat_x,Long__x,Combined_Key_x,date,confirmed,Lat_y,Long__y,Combined_Key_y,Population,deaths
0,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",1/22/20,0,32.539527,-86.644082,"Autauga, Alabama, US",55869,0
1,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",1/22/20,0,30.72775,-87.722071,"Baldwin, Alabama, US",223234,0
2,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",1/22/20,0,31.868263,-85.387129,"Barbour, Alabama, US",24686,0
3,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",1/22/20,0,32.996421,-87.125115,"Bibb, Alabama, US",22394,0
4,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",1/22/20,0,33.982109,-86.567906,"Blount, Alabama, US",57826,0


In [19]:
# Drop the columns 
df_data_us2 = df_data_us2.drop(['Lat_x','Lat_y','Long__x','Long__y','Combined_Key_x','Combined_Key_y','Country_Region'],axis=1)
#df_data_us2 = df_data_us2.drop(['Combined_Key_x','Combined_Key_y'],axis=1)
#df_data_us2 = df_data_us2.drop(['Country_Region'],axis=1)

In [20]:
df_data_us2.head()

Unnamed: 0,FIPS,Admin2,Province_State,date,confirmed,Population,deaths
0,1001.0,Autauga,Alabama,1/22/20,0,55869,0
1,1003.0,Baldwin,Alabama,1/22/20,0,223234,0
2,1005.0,Barbour,Alabama,1/22/20,0,24686,0
3,1007.0,Bibb,Alabama,1/22/20,0,22394,0
4,1009.0,Blount,Alabama,1/22/20,0,57826,0


In [21]:
df_data_us2.to_csv('covid19_confirmed_deaths_us.csv',index=False)

In [22]:
test = pd.read_csv('covid19_confirmed_deaths_us.csv' )

In [23]:
test.head()

Unnamed: 0,FIPS,Admin2,Province_State,date,confirmed,Population,deaths
0,1001.0,Autauga,Alabama,1/22/20,0,55869,0
1,1003.0,Baldwin,Alabama,1/22/20,0,223234,0
2,1005.0,Barbour,Alabama,1/22/20,0,24686,0
3,1007.0,Bibb,Alabama,1/22/20,0,22394,0
4,1009.0,Blount,Alabama,1/22/20,0,57826,0
