In [1]:
import os
import pandas as pd
from datetime import datetime, timedelta
import geopandas as gpd
from keplergl import KeplerGl
import pprint

In [2]:
data_dir = os.path.join("..", "COVID-19", "csse_covid_19_data", "csse_covid_19_time_series")
os.listdir(data_dir)

['time_series_covid19_confirmed_US.csv',
 'time_series_covid19_recovered_global.csv',
 'time_series_covid19_confirmed_global.csv',
 'time_series_covid19_deaths_US.csv',
 '.gitignore',
 'time_series_covid19_deaths_global.csv',
 'README.md']

In [3]:

df = pd.read_csv(os.path.join(data_dir, "time_series_covid19_confirmed_global.csv"))
df_x1 = pd.read_csv(os.path.join(data_dir, "time_series_covid19_recovered_global.csv"))
df_x2 = pd.read_csv(os.path.join(data_dir, "time_series_covid19_deaths_global.csv"))

In [4]:
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,110,110,120,170,174,237,273,281,299,349
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,186,197,212,223,243,259,277,304,333,361
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,409,454,511,584,716,847,986,1171,1251,1320
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,267,308,334,370,376,390,428,439,466,501
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,4,5,7,7,7,8,8,8,10,14


In [5]:
df_x1.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,2,2,2,2,5,5,10,10,10,15
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,31,31,33,44,52,67,76,89,99,104
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,29,31,31,37,46,61,61,62,90,90
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,1,1,1,10,10,10,10,16,21,26
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,0,0,0,0,1,1,1,1,2,2


In [6]:
df_x2.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,4,4,4,4,4,4,6,6,7,7
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,8,10,10,11,15,15,16,17,20,20
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,26,29,31,35,44,58,86,105,130,152
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,3,3,6,8,12,14,15,16,17,18
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,0,0,2,2,2,2,2,2,2,2


In [7]:
len(df), len(df_x1), len(df_x2)

(262, 248, 262)

In [8]:
df_m = pd.merge(df_x1, df_x2, left_on=["Province/State", "Country/Region", "Lat", "Long"], right_on=["Province/State", "Country/Region", "Lat", "Long"], suffixes=["_recovered", "_deaths"])
df_m = pd.merge(df_m, df, left_on=["Province/State", "Country/Region", "Lat", "Long"], right_on=["Province/State", "Country/Region", "Lat", "Long"], suffixes=["_removed", "_confirmed"])
df_m.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20_recovered,1/23/20_recovered,1/24/20_recovered,1/25/20_recovered,1/26/20_recovered,1/27/20_recovered,...,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,110,110,120,170,174,237,273,281,299,349
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,186,197,212,223,243,259,277,304,333,361
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,409,454,511,584,716,847,986,1171,1251,1320
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,267,308,334,370,376,390,428,439,466,501
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,4,5,7,7,7,8,8,8,10,14


In [9]:
cols = list(df_m.columns)
cols = [ c for c in cols if not(c in ["Province/State", "Country/Region", "Lat", "Long"]) ]
cols_r = [ c for c in cols if "_recovered" in c ]
cols_d = [ c for c in cols if "_deaths" in c ]
cols_c = [ c for c in cols if (not("_recovered" in c) and not("_deaths" in c)) ]

In [10]:
remain_cols = []
for c,d,r in zip(cols_c, cols_d, cols_r):
    rem = c+"_remain"
    df_m[rem] = df_m[c] - df_m[d] - df_m[r]
    remain_cols.append(rem)
df_m2 = df_m[["Province/State", "Country/Region", "Lat", "Long"]+remain_cols]
df_m2.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20_remain,1/23/20_remain,1/24/20_remain,1/25/20_remain,1/26/20_remain,1/27/20_remain,...,3/27/20_remain,3/28/20_remain,3/29/20_remain,3/30/20_remain,3/31/20_remain,4/1/20_remain,4/2/20_remain,4/3/20_remain,4/4/20_remain,4/5/20_remain
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,104,104,114,164,165,228,257,265,282,327
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,147,156,169,168,176,177,185,198,214,237
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,354,394,449,512,626,728,839,1004,1031,1078
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,263,304,327,352,354,366,403,407,428,457
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,4,5,5,5,4,5,5,5,6,10


In [11]:
df_m2 = df_m2.rename(columns={ c:c.replace("_remain", "") for c in remain_cols })
df_m2.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,104,104,114,164,165,228,257,265,282,327
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,147,156,169,168,176,177,185,198,214,237
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,354,394,449,512,626,728,839,1004,1031,1078
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,263,304,327,352,354,366,403,407,428,457
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,4,5,5,5,4,5,5,5,6,10


now start mapping

In [12]:
df = df_m2
case_type = "Remaining"

In [13]:
info_col = ["Province/State", "Country/Region", "Lat", "Long"]

col_dayend = []
col_dayend_str = []
for col_str in df.columns:
    if not(col_str in info_col):
        col_obj = datetime.strptime(col_str, "%m/%d/%y") # parse 1/21/2020 10:00 PM to the format
        #print(col_obj.date())
        #print(col_obj)
        if col_obj.date() in col_dayend:
            col_dayend = col_dayend[:-1]
            col_dayend_str = col_dayend_str[:-1]
        col_dayend.append(col_obj.date())
        col_dayend_str.append(col_str)
        #print(col)
        #print(type(col))
#len(col_dayend)
print(col_dayend_str)

['1/22/20', '1/23/20', '1/24/20', '1/25/20', '1/26/20', '1/27/20', '1/28/20', '1/29/20', '1/30/20', '1/31/20', '2/1/20', '2/2/20', '2/3/20', '2/4/20', '2/5/20', '2/6/20', '2/7/20', '2/8/20', '2/9/20', '2/10/20', '2/11/20', '2/12/20', '2/13/20', '2/14/20', '2/15/20', '2/16/20', '2/17/20', '2/18/20', '2/19/20', '2/20/20', '2/21/20', '2/22/20', '2/23/20', '2/24/20', '2/25/20', '2/26/20', '2/27/20', '2/28/20', '2/29/20', '3/1/20', '3/2/20', '3/3/20', '3/4/20', '3/5/20', '3/6/20', '3/7/20', '3/8/20', '3/9/20', '3/10/20', '3/11/20', '3/12/20', '3/13/20', '3/14/20', '3/15/20', '3/16/20', '3/17/20', '3/18/20', '3/19/20', '3/20/20', '3/21/20', '3/22/20', '3/23/20', '3/24/20', '3/25/20', '3/26/20', '3/27/20', '3/28/20', '3/29/20', '3/30/20', '3/31/20', '4/1/20', '4/2/20', '4/3/20', '4/4/20', '4/5/20']


In [14]:
info_col2 = [ c for c in info_col if not c=="First confirmed date in country (Est.)" ]
df2 = df[info_col2+col_dayend_str]
df3 = pd.melt(df2, id_vars=info_col2, value_vars=col_dayend_str, var_name='datetime', value_name='confirmed_case')
df3 = df3.dropna(subset=["confirmed_case"])
# this is to show the data using the time slider
dates = []
for d in df3.datetime.tolist():
    dt_obj = datetime.strptime(d, "%m/%d/%y") # parse 1/21/2020 10:00 PM to the format
    dates.append(dt_obj.strftime("%Y-%m-%d")+" 00:00")
df3['datetime'] = dates

In [15]:
point_gdf = gpd.GeoDataFrame(df3, geometry=gpd.points_from_xy(df3["Long"], df3["Lat"]))

In [16]:
%run map_time_series-remaining.conf.py
# use the previous config as base
#config

In [17]:
map_1 = KeplerGl(height=900, config=config)
map_1.add_data(data=point_gdf, name="confirmed_case")

map_1

User Guide: https://github.com/keplergl/kepler.gl/blob/master/docs/keplergl-jupyter/user-guide.md


KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [{'dataId': 'confirmed_case', 'name': 'da…

In [20]:
# add the time slider to map, need to manually add this to the config, because the save_to_html won't recognize the filter part

map_1.config["config"]['visState']["filters"] = [{"dataId": 'confirmed_case', "name": "datetime", 
                                                      'type': 'timeRange',
                                                      'value': [1579651200000, 1579737600000], ## a day
                                                      'name': 'datetime'}]


In [21]:
map_1.save_to_html(file_name="map_covid19_{}.html".format(case_type), config=map_1.config, read_only=True)

Map saved to map_covid19_Remaining.html!
