<a href="https://colab.research.google.com/github/wolego2uni/ibm_demo/blob/main/4_10_Map_Visualizations_Choropeth_Maps.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Let's do some awesome Choropleth Maps!

In [34]:
# Install Plotly-geo
!pip install plotly-geo
!pip install geopandas==0.3.0
!pip install pyshp==1.2.10
!pip install shapely==1.6.3



In [35]:
# First we impot plotly's figure factory
import plotly.figure_factory as ff

import numpy as np
import pandas as pd

# Load our data
df_sample = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/laucnty16.csv')
df_sample.head()

Unnamed: 0,LAUS Code,State FIPS Code,County FIPS Code,County Name/State Abbreviation,Year,Labor Force,Employed,Unemployed,Unemployment Rate (%)
0,CN0100100000000,1,1,"Autauga County, AL",2016,25649,24297,1352,5.3
1,CN0100300000000,1,3,"Baldwin County, AL",2016,89931,85061,4870,5.4
2,CN0100500000000,1,5,"Barbour County, AL",2016,8302,7584,718,8.6
3,CN0100700000000,1,7,"Bibb County, AL",2016,8573,8004,569,6.6
4,CN0100900000000,1,9,"Blount County, AL",2016,24525,23171,1354,5.5


The columns or features we're interested in using for ur plots are the State FIPS Code and County FIPS code, but we'll need to combine them to make it into a full zip code

In [36]:
# Now let's format the State FIPS Code into the format needed. i.e with a zero before the one
df_sample['State FIPS Code'] = df_sample['State FIPS Code'].apply(lambda x: str(x).zfill(2))
df_sample.head()

Unnamed: 0,LAUS Code,State FIPS Code,County FIPS Code,County Name/State Abbreviation,Year,Labor Force,Employed,Unemployed,Unemployment Rate (%)
0,CN0100100000000,1,1,"Autauga County, AL",2016,25649,24297,1352,5.3
1,CN0100300000000,1,3,"Baldwin County, AL",2016,89931,85061,4870,5.4
2,CN0100500000000,1,5,"Barbour County, AL",2016,8302,7584,718,8.6
3,CN0100700000000,1,7,"Bibb County, AL",2016,8573,8004,569,6.6
4,CN0100900000000,1,9,"Blount County, AL",2016,24525,23171,1354,5.5


In [37]:
# Creating our FIPS
df_sample['County FIPS Code'] = df_sample['County FIPS Code'].apply(lambda x: str(x).zfill(3))
df_sample['FIPS'] = df_sample['State FIPS Code'] + df_sample['County FIPS Code']
df_sample.head()

Unnamed: 0,LAUS Code,State FIPS Code,County FIPS Code,County Name/State Abbreviation,Year,Labor Force,Employed,Unemployed,Unemployment Rate (%),FIPS
0,CN0100100000000,1,1,"Autauga County, AL",2016,25649,24297,1352,5.3,1001
1,CN0100300000000,1,3,"Baldwin County, AL",2016,89931,85061,4870,5.4,1003
2,CN0100500000000,1,5,"Barbour County, AL",2016,8302,7584,718,8.6,1005
3,CN0100700000000,1,7,"Bibb County, AL",2016,8573,8004,569,6.6,1007
4,CN0100900000000,1,9,"Blount County, AL",2016,24525,23171,1354,5.5,1009


In [38]:
# We define our color scale, this is simply the colors we use for each category class in unemployed
colorscale = ["#f7fbff", "#ebf3fb", "#deebf7", "#d2e3f3", "#c6dbef", "#b3d2e9", "#9ecae1",
    "#85bcdb", "#6baed6", "#57a0ce", "#4292c6", "#3082be", "#2171b5", "#1361a9",
    "#08519c", "#0b4083", "#08306b"
]

# Define our categories using np.linspace
endpts = list(np.linspace(1, 12, len(colorscale) - 1))

In [39]:
# Let's view our endpts
endpts

[1.0,
 1.7333333333333334,
 2.466666666666667,
 3.1999999999999997,
 3.933333333333333,
 4.666666666666666,
 5.3999999999999995,
 6.133333333333333,
 6.866666666666666,
 7.6,
 8.333333333333332,
 9.066666666666666,
 9.799999999999999,
 10.533333333333333,
 11.266666666666666,
 12.0]

In [40]:
# How linspace works
np.linspace(1.0, 3.0, num=5)

array([1. , 1.5, 2. , 2.5, 3. ])

In [41]:
fips =df_sample['FIPS'].tolist()

In [42]:
# put our unemployed values into a list 
values = df_sample['Unemployment Rate (%)'].tolist()

# Now ready to plot!

In [43]:
import shapely
import shapefile
import plotly
from plotly.figure_factory._county_choropleth import create_choropleth
import xlrd

In [44]:
fig = ff.create_choropleth(
    fips=fips,
    values=values,
    scope=['usa'],
    binning_endpoints=endpts,
    colorscale=colorscale,
    show_state_data=False,
    show_hover=True,
    asp = 2.9, # aspectratio
    title_text = 'USA by Unemployment %',
    legend_title = '% unemployed'
)
fig.layout.template = None
fig.show()

ImportError: ignored

# World Choropleth

In [45]:
import plotly.express as px

gapminder = px.data.gapminder().query("year==2007")
gapminder

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
11,Afghanistan,Asia,2007,43.828,31889923,974.580338,AFG,4
23,Albania,Europe,2007,76.423,3600523,5937.029526,ALB,8
35,Algeria,Africa,2007,72.301,33333216,6223.367465,DZA,12
47,Angola,Africa,2007,42.731,12420476,4797.231267,AGO,24
59,Argentina,Americas,2007,75.320,40301927,12779.379640,ARG,32
...,...,...,...,...,...,...,...,...
1655,Vietnam,Asia,2007,74.249,85262356,2441.576404,VNM,704
1667,West Bank and Gaza,Asia,2007,73.422,4018332,3025.349798,PSE,275
1679,"Yemen, Rep.",Asia,2007,62.698,22211743,2280.769906,YEM,887
1691,Zambia,Africa,2007,42.384,11746035,1271.211593,ZMB,894


In [46]:
gapminder.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 142 entries, 11 to 1703
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   country    142 non-null    object 
 1   continent  142 non-null    object 
 2   year       142 non-null    int64  
 3   lifeExp    142 non-null    float64
 4   pop        142 non-null    int64  
 5   gdpPercap  142 non-null    float64
 6   iso_alpha  142 non-null    object 
 7   iso_num    142 non-null    int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 10.0+ KB


In [47]:
import plotly.express as px

gapminder = px.data.gapminder().query("year==2007")
fig = px.choropleth(gapminder, locations="iso_alpha",
                    color="lifeExp", # lifeExp is a column of gapminder
                    hover_name="country", # column to add to hover information
                    color_continuous_scale=px.colors.sequential.Plasma)
fig.show()

In [48]:
import plotly.graph_objects as go

# Load data frame and tidy it.
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv')

fig = go.Figure(data=go.Choropleth(
    locations=df['code'], # Spatial coordinates
    z = df['total exports'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = "Millions USD",
))

fig.update_layout(
    title_text = '2011 US Agriculture Exports by State',
    geo_scope='usa', # limite map scope to USA
)

fig.show()

In [49]:
import plotly.graph_objects as go

import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv')

for col in df.columns:
    df[col] = df[col].astype(str)

df['text'] = df['state'] + '<br>' + \
    'Beef ' + df['beef'] + ' Dairy ' + df['dairy'] + '<br>' + \
    'Fruits ' + df['total fruits'] + ' Veggies ' + df['total veggies'] + '<br>' + \
    'Wheat ' + df['wheat'] + ' Corn ' + df['corn']

fig = go.Figure(data=go.Choropleth(
    locations=df['code'],
    z=df['total exports'].astype(float),
    locationmode='USA-states',
    colorscale='Reds',
    autocolorscale=False,
    text=df['text'], # hover text
    marker_line_color='white', # line markers between states
    colorbar_title="Millions USD"
))

fig.update_layout(
    title_text='2011 US Agriculture Exports by State<br>(Hover for breakdown)',
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255, 255, 255)'),
)


In [50]:
import plotly.graph_objects as go
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')

fig = go.Figure(data=go.Choropleth(
    locations = df['CODE'],
    z = df['GDP (BILLIONS)'],
    text = df['COUNTRY'],
    colorscale = 'Blues',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_tickprefix = '$',
    colorbar_title = 'GDP<br>Billions US$',
))

fig.update_layout(
    title_text='2014 Global GDP',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">\
            CIA World Factbook</a>',
        showarrow = False
    )]
)

fig.show()

In [51]:
import plotly.graph_objects as go

import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_ebola.csv')
df.head()

colors = ['rgb(239,243,255)', 'rgb(189,215,231)', 'rgb(107,174,214)', 'rgb(33,113,181)']
months = {6:'June', 7:'July', 8:'Aug', 9:'Sept'}

fig = go.Figure()

# scatter chart for outbreak size
for i in range(6,10)[::-1]:
    df_month = df.query('Month == %d' %i)
    fig.add_trace(go.Scattergeo(
        lon = df_month['Lon'],
        lat = df_month['Lat'],
        text = df_month['Value'],
        name = months[i],
        marker = dict(
            size=df_month['Value']/50,
            color=colors[i-6],
            line_width=0)
        )
    )

df_sept = df.query('Month == 9')
fig.data[0].update(text = df_sept['Value'].map('{:.0f}'.format).astype(str)+' '+\
                        df_sept['Country'],
                     mode = 'markers+text',
                     textposition = 'bottom center')


fig.add_trace(go.Choropleth(
        locationmode='country names',
        locations=df_sept['Country'],
        z=df_sept['Value'],
        text=df_sept['Country'],
        colorscale = [[0,'rgb(0, 0, 0)'],[1,'rgb(0, 0, 0)']],
        autocolorscale = False,
        showscale = False,
        geo = 'geo2'
    ))
fig.add_trace(go.Scattergeo(
        lon = [21.0936],
        lat = [7.1881],
        text = ['Africa'],
        mode = 'text',
        showlegend = False,
        geo = 'geo2'
    ))

fig.update_layout(
    title_text = 'Ebola cases reported by month in West Africa 2014<br> \
Source: <a href="https://data.hdx.rwlabs.org/dataset/rowca-ebola-cases">\
HDX</a>',
    geo = dict(
        resolution=50,
        scope='africa',
        showframe=False,
        showcoastlines=True,
        showland=True,
        landcolor="lightgray",
        countrycolor="white" ,
        coastlinecolor="white",
        projection_type='equirectangular',
        lonaxis_range=[ -15.0, -5.0],
        lataxis_range=[ 0.0, 12.0],
        domain = dict(x=[0, 1], y=[ 0, 1])
    ),
    geo2 = dict(
        scope='africa',
        showframe=False,
        showland=True,
        landcolor="lightgray",
        showcountries=False,
        domain=dict(x=[ 0, 0.6], y=[ 0, 0.6]),
        bgcolor='rgba(255, 255, 255, 0.0)',
    ),
    legend_traceorder = 'reversed'
)

fig.show()