# Visualization Map

## Import Libraries

In [0]:
# Need to upgrade the plotting library first -- not Python codes
%pip install --upgrade plotly 

# Import libraries
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import plotly.express as px # for plotting maps
from google.colab import files # for downloading date from the notebook
import plotly.graph_objects as go
from urllib.request import urlopen
import json

# Magic functions for printing high quality plots in the notebook
# They are not Python codes
%matplotlib inline 
%config InlineBackend.figure_format = "retina"

Requirement already up-to-date: plotly in /usr/local/lib/python3.6/dist-packages (4.8.1)


In [0]:
data_url = "https://raw.githubusercontent.com/nytimes/covid-19-data/e8945b9d8bfd524a8083f6935e6556162a90b10f/live/us-states.csv"
df = pd.read_csv(data_url)
df = df.dropna(axis='index') # drop all the rows with Not a Number (NaN)
df.head(10)

Unnamed: 0,date,state,fips,cases,deaths,confirmed_cases,confirmed_deaths,probable_cases,probable_deaths
0,2020-06-04,Alabama,1,19072,653,18766.0,651.0,306.0,2.0
1,2020-06-04,Alaska,2,524,10,524.0,10.0,0.0,0.0
3,2020-06-04,Arkansas,5,8425,151,8425.0,151.0,0.0,0.0
4,2020-06-04,California,6,122596,4480,122596.0,4480.0,0.0,0.0
8,2020-06-04,District of Columbia,11,9120,475,9120.0,475.0,0.0,0.0
9,2020-06-04,Florida,12,60175,2606,60175.0,2606.0,0.0,0.0
10,2020-06-04,Georgia,13,47528,2116,47528.0,2116.0,0.0,0.0
11,2020-06-04,Guam,66,1148,6,1140.0,6.0,8.0,0.0
13,2020-06-04,Idaho,16,2990,83,2709.0,78.0,281.0,5.0
14,2020-06-04,Illinois,17,125114,5772,125114.0,5772.0,0.0,0.0


## Data Preprocessing

### State Mapping

The plotly function we will be using requires the state abbreviations as input. So we have to add a column of state abbreviations in the dataframe

In [0]:
# Dictionary mapping of state to its abbreviation
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}
# Add the column to dataframe using dictionary mapping
df['abbrev'] = df['state'].map(us_state_abbrev) 
df.head(10)

Unnamed: 0,date,state,fips,cases,deaths,confirmed_cases,confirmed_deaths,probable_cases,probable_deaths,abbrev
0,2020-06-04,Alabama,1,19072,653,18766.0,651.0,306.0,2.0,AL
1,2020-06-04,Alaska,2,524,10,524.0,10.0,0.0,0.0,AK
3,2020-06-04,Arkansas,5,8425,151,8425.0,151.0,0.0,0.0,AR
4,2020-06-04,California,6,122596,4480,122596.0,4480.0,0.0,0.0,CA
8,2020-06-04,District of Columbia,11,9120,475,9120.0,475.0,0.0,0.0,DC
9,2020-06-04,Florida,12,60175,2606,60175.0,2606.0,0.0,0.0,FL
10,2020-06-04,Georgia,13,47528,2116,47528.0,2116.0,0.0,0.0,GA
11,2020-06-04,Guam,66,1148,6,1140.0,6.0,8.0,0.0,GU
13,2020-06-04,Idaho,16,2990,83,2709.0,78.0,281.0,5.0,ID
14,2020-06-04,Illinois,17,125114,5772,125114.0,5772.0,0.0,0.0,IL


### Population Mapping

We need to again map the population to the state

In [0]:
# Population Mapping

pop_url = "https://raw.githubusercontent.com/ylin910095/data_visualization_2020/master/csv/us_population_states/population_state.csv"
# We need special encoding because the file is presumably create in Windows
pop_df = pd.read_csv(pop_url) 
state_pop_df = pop_df.iloc[5:] # remove the first couple rows
state_pop_df = state_pop_df[['NAME', 'STATE', 'POPESTIMATE2019']] # only retain those three columns
state_pop_df.rename(columns={'STATE':'fips'}, inplace=True) # to merge these two tables, we have to rename STATE to fips
merge_df = pd.merge(df, state_pop_df, on='fips') 
merge_df.drop(columns=['NAME'], inplace=True) # drop the redundant column 
merge_df.head(5)

Unnamed: 0,date,state,fips,cases,deaths,confirmed_cases,confirmed_deaths,probable_cases,probable_deaths,abbrev,POPESTIMATE2019
0,2020-06-04,Alabama,1,19072,653,18766.0,651.0,306.0,2.0,AL,4903185
1,2020-06-04,Alaska,2,524,10,524.0,10.0,0.0,0.0,AK,731545
2,2020-06-04,Arkansas,5,8425,151,8425.0,151.0,0.0,0.0,AR,3017804
3,2020-06-04,California,6,122596,4480,122596.0,4480.0,0.0,0.0,CA,39512223
4,2020-06-04,District of Columbia,11,9120,475,9120.0,475.0,0.0,0.0,DC,705749


## Choropleth Maps

See https://plotly.com/python/choropleth-maps/ for examples.

In [0]:
fig = go.Figure(data=go.Choropleth(
                locations=merge_df['abbrev'], # Spatial coordinates, it only recognize state abbrev
                z = merge_df['POPESTIMATE2019'].astype(float),
                locationmode = 'USA-states', # set of locations match entries in `locations`
                colorscale = 'Blues',
                colorbar_title = "population",
                ))

fig.update_layout(
    title_text = 'US Population by state',
    geo_scope='usa', # limit map scope to US
)

fig.show()

# Uncomment these two lines to download the data visualization 
fig.write_html("dataviz.html")
files.download("dataviz.html")

In [0]:
fig = go.Figure(data=go.Choropleth(
                locations=merge_df['abbrev'], # Spatial coordinates, it only recognize state abbrev
                z = merge_df['confirmed_cases'].astype(float),
                locationmode = 'USA-states', # set of locations match entries in `locations`
                colorscale = 'Blues',
                colorbar_title = "confirmed cases",
                ))

fig.update_layout(
    title_text = 'COVID-19 total confirmed cases',
    geo_scope='usa', # limit map scope to US
)

fig.show()

# Uncomment these two lines to download the data visualization 
#fig.write_html("dataviz.html")
#files.download("dataviz.html")

In [0]:
fig = go.Figure(data=go.Choropleth(
                locations=merge_df['abbrev'], # Spatial coordinates, it only recognize state abbrev
                z = (merge_df['confirmed_cases']/merge_df['POPESTIMATE2019']).astype(float),
                locationmode = 'USA-states', # set of locations match entries in `locations`
                colorscale = 'Blues',
                colorbar_title = "confirmed cases per capita",
                ))

fig.update_layout(
    title_text = 'COVID-19 total confirmed cases per capita',
    geo_scope='usa', # limit map scope to US
)

fig.show()

# Uncomment these two lines to download the data visualization 
#fig.write_html("dataviz.html")
#files.download("dataviz.html")