In [1]:
!pip install pandas
!pip install pycountry
!pip install plotly
!pip install jupyter-dash



In [2]:
!pip install gs-quant



In [3]:
import plotly.express as px
import plotly.graph_objects as go
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
from gs_quant.session import GsSession, Environment
from gs_quant.data import Dataset
import dash
import datetime
import pandas as pd
import pycountry
import numpy as np

In [4]:
#df_confirm = pd.read_csv('/Users/sarahbost/Downloads/COVID-19-master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')

In [5]:
GsSession.use(client_id="ce33aefa85dc4938852db7c3f96a2e1d", client_secret="4cdf63cd871e522e8578c1b14cc60dcd2575802cc4610addd1fdab2ada14e63d", scopes=('read_product_data',))

In [6]:
def get_datasets(datasets):
    num_weeks = 50 # number of weeks to go back
    ds_dict = {}
    end = datetime.date(2020, 12, 12)
    start = end - datetime.timedelta(weeks=num_weeks)
    
    for dataset in datasets:
        try:
            df = Dataset(dataset).get_data(start, end - datetime.timedelta(weeks=(num_weeks-1))) # first week's data
            for i in range(1, num_weeks):
              df=df.append(Dataset(dataset).get_data(end - datetime.timedelta(weeks=(num_weeks-i)), end - datetime.timedelta(weeks=(num_weeks-(i+1))))) # append each subsequent week to dataframe

            keys = [x for x in ['countryId', 'subdivisionId'] if x in df.columns] + ['date']
            val_map = {'newConfirmed': 'totalConfirmed', 'newFatalities': 'totalFatalities'}
            vals = [x for x in list(val_map.keys()) if x in df.columns]

            df_t = df.groupby(keys).sum().groupby(level=0).cumsum().reset_index()[keys + vals].rename(columns=val_map)
            ds_dict[dataset] = df.reset_index().merge(df_t, on=keys, suffixes=('', '_y')).set_index('date')

        except Exception as err:
            print(f'Failed to obtain {dataset} with {getattr(err, "message", repr(err))}')
    return ds_dict

In [7]:
country_datasets = [
    'COVID19_COUNTRY_DAILY_ECDC', 
    'COVID19_COUNTRY_DAILY_WHO',
    'COVID19_COUNTRY_DAILY_WIKI', 
    'COVID19_US_DAILY_CDC'
]
df = get_datasets(country_datasets)

In [8]:
for name, ds in df.items():
    print('{:<30}  {}  {}'.format(name, ds.index[0].date(), ds.index[-1].date())) 

COVID19_COUNTRY_DAILY_ECDC      2019-12-31  2020-10-24
COVID19_COUNTRY_DAILY_WHO       2020-01-03  2020-12-12
COVID19_COUNTRY_DAILY_WIKI      2020-04-06  2020-12-12
COVID19_US_DAILY_CDC            2020-01-22  2020-08-26


In [9]:
def get_country_code(name):
    try:
        return pycountry.countries.lookup(name).alpha_3
    except:
        return None

country_map = {
    'Curacao': 'Curaçao',
    'Congo, Democratic Republic of the': 'Congo, The Democratic Republic of the',
    'Pitcairn Islands': 'Pitcairn',
    'Bolivia (Plurinational State of)': 'Bolivia',
    'Virgin Islands (British)': 'British Virgin Islands',
    'The Ivory Coast': "Côte d'Ivoire",
    'Venezuela (Bolivarian Republic of)': 'Venezuela',
    'Reunion': 'Réunion',
    'Republic of Korea': 'Korea, Republic of',
    'Virgin Islands (U.S.)': 'Virgin Islands, U.S.',
    'Iran (Islamic Republic of)': 'Iran, Islamic Republic of',
    'Holy See': 'Holy See (Vatican City State)',
    'Saint Helena': 'Saint Helena, Ascension and Tristan da Cunha',
    'Micronesia (Federated States of)': 'Micronesia, Federated States of',
}
def rename_countries(name):
    if name in country_map:
      return country_map[name]
    return name

df_who = df['COVID19_COUNTRY_DAILY_WHO'].drop(columns=['totalFatalities', 'newFatalities', 'totalConfirmed_y','totalFatalities_y',
       'transmissionClassification', 'daysSinceReported', 'regionName', 'isTerritory', 'updateTime'])

df_who['countryName'] = df_who['countryName'].apply(rename_countries)
df_who = df_who[df_who.countryName != 'Kosova']
df_who = df_who[df_who.countryName != 'Other']

df_who = df_who.groupby(by=["countryName", "date", "countryId"]).agg("sum")
df_who.reset_index(inplace=True)

df_who['iso_alpha_3'] = df_who['countryName'].apply(get_country_code)
date_list=list(df_who.columns)

df_who['date'] = df_who['date'].apply(lambda x: str(x.date()))
pd.set_option('display.min_rows', 300)

print(df_who)

       countryName        date countryId  totalConfirmed  newConfirmed  \
0      Afghanistan  2020-01-03        AF             0.0           0.0   
1      Afghanistan  2020-01-04        AF             0.0           0.0   
2      Afghanistan  2020-01-05        AF             0.0           0.0   
3      Afghanistan  2020-01-06        AF             0.0           0.0   
4      Afghanistan  2020-01-07        AF             0.0           0.0   
5      Afghanistan  2020-01-08        AF             0.0           0.0   
6      Afghanistan  2020-01-09        AF             0.0           0.0   
7      Afghanistan  2020-01-10        AF             0.0           0.0   
8      Afghanistan  2020-01-11        AF             0.0           0.0   
9      Afghanistan  2020-01-12        AF             0.0           0.0   
10     Afghanistan  2020-01-13        AF             0.0           0.0   
11     Afghanistan  2020-01-14        AF             0.0           0.0   
12     Afghanistan  2020-01-15        

In [10]:
fig = px.choropleth(df_who, 
                    locations="iso_alpha_3", 
                    color="totalConfirmed", 
                    hover_name="countryName", 
                    hover_data=["countryId"], 
                    width=1500, height=300,
                    animation_frame="date", 
                    projection="natural earth", 
                    color_continuous_scale="Peach", 
                    range_color=[0,3000000])

In [11]:
mobility_df = pd.read_csv('https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv')

# Only keep rows where all extra location identifiers are null
mobility_df = mobility_df[mobility_df['sub_region_1'].isnull() & mobility_df['sub_region_2'].isnull() & mobility_df['metro_area'].isnull()]

# ISO_3166_2_code and census_fips_code columns are all null
modified_mobility_df = mobility_df.drop(columns=['sub_region_1', 'sub_region_2', 'metro_area', 'iso_3166_2_code', 'census_fips_code'])

# print(mobility_df.shape, modified_mobility_df.shape)
print(modified_mobility_df.iloc[-1])


Columns (4,5) have mixed types.Specify dtype option on import or set low_memory=False.



country_region_code                                           ZW
country_region                                          Zimbabwe
date                                                  2020-12-07
retail_and_recreation_percent_change_from_baseline             5
grocery_and_pharmacy_percent_change_from_baseline             18
parks_percent_change_from_baseline                            17
transit_stations_percent_change_from_baseline                 10
workplaces_percent_change_from_baseline                        6
residential_percent_change_from_baseline                       5
Name: 3411271, dtype: object


In [12]:
print(mobility_df)

        country_region_code        country_region sub_region_1 sub_region_2  \
0                        AE  United Arab Emirates          NaN          NaN   
1                        AE  United Arab Emirates          NaN          NaN   
2                        AE  United Arab Emirates          NaN          NaN   
3                        AE  United Arab Emirates          NaN          NaN   
4                        AE  United Arab Emirates          NaN          NaN   
5                        AE  United Arab Emirates          NaN          NaN   
6                        AE  United Arab Emirates          NaN          NaN   
7                        AE  United Arab Emirates          NaN          NaN   
8                        AE  United Arab Emirates          NaN          NaN   
9                        AE  United Arab Emirates          NaN          NaN   
10                       AE  United Arab Emirates          NaN          NaN   
11                       AE  United Arab Emirates   

In [13]:
scat = px.scatter(x=[0,0], y=[0,0])

In [14]:
app = JupyterDash(__name__)

app.layout = html.Div([
    html.Div([html.H1("Mobility and COVID-19 Cases")], style={'text-align':'center', 'margin-right': '50px'}),
    html.Div([
    html.Div([dcc.Graph(figure=fig, 
              id="map", style= {'margin-left': 150, 'margin-right': 80, 'text-align':'center', 'width': '50%', 'align-items': 'center', 'display': 'flex'}),
   
    html.Div([
    dcc.Dropdown(
        id='dropdown',
        options=[
            {'label': 'Total Cases', 'value': 'total'},
            {'label': 'New Cases', 'value': 'new'},
        ],style={'display': 'flex', 'width': '400px', 'height': '100%', 'backgroundColor': '#FFFFFF'})
    ], style={'display': 'flex', 'align-items': 'center',  'justify-content': 'center'}),
              
    html.Div([], id="scatContainer", style={'display': 'flex', 'width': '400', 'height': '600', 'backgroundColor': '#FFFFFF', 'align-items': 'center', 'justify-content': 'center'}),
        
        
        
], style={'text-align':'center', 'height': '800px', 'backgroundColor': '#FFFFFF', 'color': '#424343', 'margin-left': 50, 'margin-right': 50, 'align-items': 'center', 'justify-content': 'center', 'box-shadow': '8px 8px 8px 8px rgba(30, 30, 30, 0.2)'})
],  style={'text-align':'center', 'backgroundColor': '#FFFFFF', 'color': '#FFFFFF', 'align-items': 'center', 'justify-content': 'center'})
])
@app.callback(
    Output('scatContainer', 'children'),
    [Input('map', 'clickData')])
def update_scat(clickData): 
    if clickData is None: 
        raise dash.exceptions.PreventUpdate()
    print(clickData)
    df_selected_country_mobility = mobility_df.loc[mobility_df['country_region_code'] == clickData['points'][0]['customdata'][0]]
    if df_selected_country_mobility.empty:
        return [html.H1("Mobility data not available")]
    scat = go.Figure()
    scat.add_trace(go.Scatter(x=df_selected_country_mobility['date'], y=df_selected_country_mobility['residential_percent_change_from_baseline'], mode="markers", name="Residential"))
    scat.add_trace(go.Scatter(x=df_selected_country_mobility['date'], y=df_selected_country_mobility['retail_and_recreation_percent_change_from_baseline'], mode="markers", name="Retail"))
    scat.add_trace(go.Scatter(x=df_selected_country_mobility['date'], y=df_selected_country_mobility['grocery_and_pharmacy_percent_change_from_baseline'], mode="markers", name="Grocery"))
    scat.add_trace(go.Scatter(x=df_selected_country_mobility['date'], y=df_selected_country_mobility['parks_percent_change_from_baseline'], mode="markers", name="Parks"))
    scat.add_trace(go.Scatter(x=df_selected_country_mobility['date'], y=df_selected_country_mobility['transit_stations_percent_change_from_baseline'], mode="markers", name="Transit"))
    scat.add_trace(go.Scatter(x=df_selected_country_mobility['date'], y=df_selected_country_mobility['workplaces_percent_change_from_baseline'], mode="markers", name="Work"))
    scat.update_layout(
        title={
            'text': clickData['points'][0]['hovertext'], 
             'x':0.5,
            'xanchor': 'center', 
        },
        height=375,
        width=1450, 
         margin=dict(
        l=100,
        r=0,
        b=0,
        t=45,
        pad=15
    ),
        xaxis_title="Date",
        yaxis_title="Percent above Baseline",
    )
    return html.Div([dcc.Graph(figure=scat, 
              id="scat")])

@app.callback(
    Output('map', 'figure'),
    [Input('dropdown', 'value')])
def update_map(value): 
    if value is None: 
        raise dash.exceptions.PreventUpdate()
    
    if value == 'new': 
        return px.choropleth(df_who, 
                        locations="iso_alpha_3", 
                        color="newConfirmed", 
                        hover_name="countryName", 
                        width=1500, height=400,
                        hover_data=["countryId"], 
                        animation_frame="date", 
                        projection="natural earth", 
                        color_continuous_scale="Peach", 
                        range_color=[0,30000])

    else: 
        return px.choropleth(df_who, 
                        locations="iso_alpha_3", 
                        color="totalConfirmed", 
                        hover_name="countryName", 
                        hover_data=["countryId"], 
                        width=1500, height=400,
                        animation_frame="date", 
                        projection="natural earth", 
                        color_continuous_scale="Peach", 
                        range_color=[0,3000000])


    
    
app.run_server(mode='jupyterlab', port = 8090, dev_tools_ui=True, debug=True, dev_tools_hot_reload=True, threaded=True)

{'points': [{'curveNumber': 0, 'pointNumber': 222, 'pointIndex': 222, 'location': 'USA', 'z': 0, 'hovertext': 'United States of America', 'customdata': ['US']}]}
{'points': [{'curveNumber': 0, 'pointNumber': 24, 'pointIndex': 24, 'location': 'BOL', 'z': 0, 'hovertext': 'Bolivia', 'customdata': ['BO']}]}
{'points': [{'curveNumber': 0, 'pointNumber': 11, 'pointIndex': 11, 'location': 'AUS', 'z': 0, 'hovertext': 'Australia', 'customdata': ['AU']}]}
{'points': [{'curveNumber': 0, 'pointNumber': 172, 'pointIndex': 172, 'location': 'RUS', 'z': 0, 'hovertext': 'Russian Federation', 'customdata': ['RU']}]}
{'points': [{'curveNumber': 0, 'pointNumber': 37, 'pointIndex': 37, 'location': 'CAN', 'z': 0, 'hovertext': 'Canada', 'customdata': ['CA']}]}
{'points': [{'curveNumber': 0, 'pointNumber': 28, 'pointIndex': 28, 'location': 'BRA', 'z': 0, 'hovertext': 'Brazil', 'customdata': ['BR']}]}
{'points': [{'curveNumber': 0, 'pointNumber': 11, 'pointIndex': 11, 'location': 'AUS', 'z': 0, 'hovertext': 'A