# Choropleth Maps with Folium

In [1]:
import pandas as pd
import numpy as np

import os
import ast
import re
import folium
import branca

## Import Data

In [2]:
# geometry files
state_geo = os.path.join('data', 'us_states.json')
county_geo = os.path.join('data', 'us_counties.json')
# county_geo = os.path.join('data', 'us_counties_20m_topo.json') # use the one below instead!

# sample data
county_data = pd.read_csv('data/us_county_data.csv', dtype={'FIPS_Code': int})
state_data = pd.read_csv('data/US_Unemployment_Oct2012.csv')

In [3]:
# pesticide data
filepath = os.path.join('data', 'EPest_county_estimates_2015.txt')
pesticides = pd.read_table(filepath, dtype={'COUNTY_FIPS_CODE': str, 'STATE_FIPS_CODE': str})

## Quick Exploratory Data Analysis

In [4]:
pesticides.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 377011 entries, 0 to 377010
Data columns (total 6 columns):
COMPOUND            377011 non-null object
YEAR                377011 non-null int64
STATE_FIPS_CODE     377011 non-null object
COUNTY_FIPS_CODE    377011 non-null object
EPEST_LOW_KG        254831 non-null float64
EPEST_HIGH_KG       377011 non-null float64
dtypes: float64(2), int64(1), object(3)
memory usage: 17.3+ MB


In [5]:
pesticides.apply(pd.Categorical).describe()

Unnamed: 0,COMPOUND,YEAR,STATE_FIPS_CODE,COUNTY_FIPS_CODE,EPEST_LOW_KG,EPEST_HIGH_KG
count,377011,377011,377011,377011,254831.0,377011.0
unique,403,1,48,266,31400.0,37092.0
top,GLYPHOSATE,2015,48,1,0.0,0.0
freq,3058,377011,27607,5988,19654.0,22395.0


In [6]:
pesticides.sample(5)

Unnamed: 0,COMPOUND,YEAR,STATE_FIPS_CODE,COUNTY_FIPS_CODE,EPEST_LOW_KG,EPEST_HIGH_KG
30900,BACILLUS CEREUS,2015,48,143,,0.0
78002,COPPER HYDROXIDE,2015,26,11,266.2,278.0
93642,CYHALOTHRIN-LAMBDA,2015,31,183,1.9,36.9
341835,TERBUFOS,2015,42,15,,4.0
31627,BACILLUS SUBTILIS,2015,56,13,0.7,0.7


In [7]:
pesticides.groupby('COMPOUND').get_group('2,4-D').head()

Unnamed: 0,COMPOUND,YEAR,STATE_FIPS_CODE,COUNTY_FIPS_CODE,EPEST_LOW_KG,EPEST_HIGH_KG
48,"2,4-D",2015,1,1,3243.5,3363.2
49,"2,4-D",2015,1,3,2178.0,3435.9
50,"2,4-D",2015,1,5,5650.9,5825.6
51,"2,4-D",2015,1,7,949.7,957.6
52,"2,4-D",2015,1,9,9507.1,9591.2


## Data Munging

In [8]:
# create a _unique_ FIPS for each county
# NOTE: we want to drop leading zeros because that's how it is in the JSON geometry file
# alternatively, we could zero-pad the feature[index]['id'] values in the .json, but that's more work.
pesticides['FIPS_Code'] = pesticides['STATE_FIPS_CODE'] + pesticides['COUNTY_FIPS_CODE']
pesticides['FIPS_Code'] = pesticides['FIPS_Code'].astype(int).astype(str) 


# take a peak at 2,4-D usage in Boulder County
pesticides[(pesticides.FIPS_Code == '8013') & (pesticides.COMPOUND == '2,4-D')]

Unnamed: 0,COMPOUND,YEAR,STATE_FIPS_CODE,COUNTY_FIPS_CODE,EPEST_LOW_KG,EPEST_HIGH_KG,FIPS_Code
259,"2,4-D",2015,8,13,592.9,593.0,8013


In [9]:
# take a deeper look at the JSON geometry files
geo_data = open('data/us_counties.json').read()
geo_data = ast.literal_eval(geo_data) # convert from string to dictionary!
geo_data.keys()

geo_fips = [record['id'] for record in geo_data['features']]
data_fips = [fips for fips in pesticides.FIPS_Code]

# ensure that my geo_ID matches the id used in the geo_data JSON.
intersection = set(geo_fips).intersection(set(data_fips))

print("{} of {} FIPS codes match".format(len(intersection), len(set(data_fips))))

3056 of 3063 FIPS codes match


In [10]:
# add state names
state_codes = {
    'WA': '53', 'DE': '10', 'DC': '11', 'WI': '55', 'WV': '54', 'HI': '15',
    'FL': '12', 'WY': '56', 'PR': '72', 'NJ': '34', 'NM': '35', 'TX': '48',
    'LA': '22', 'NC': '37', 'ND': '38', 'NE': '31', 'TN': '47', 'NY': '36',
    'PA': '42', 'AK': '02', 'NV': '32', 'NH': '33', 'VA': '51', 'CO': '08',
    'CA': '06', 'AL': '01', 'AR': '05', 'VT': '50', 'IL': '17', 'GA': '13',
    'IN': '18', 'IA': '19', 'MA': '25', 'AZ': '04', 'ID': '16', 'CT': '09',
    'ME': '23', 'MD': '24', 'OK': '40', 'OH': '39', 'UT': '49', 'MO': '29',
    'MN': '27', 'MI': '26', 'RI': '44', 'KS': '20', 'MT': '30', 'MS': '28',
    'SC': '45', 'KY': '21', 'OR': '41', 'SD': '46'
}

state_fips_to_name = {v: k for k, v in state_codes.items()}
pesticides['State'] = pesticides.STATE_FIPS_CODE.astype(str).replace(state_fips_to_name)

In [11]:
# see where we're at
pesticides[(pesticides.FIPS_Code == '8013') & (pesticides.COMPOUND == '2,4-D')]

Unnamed: 0,COMPOUND,YEAR,STATE_FIPS_CODE,COUNTY_FIPS_CODE,EPEST_LOW_KG,EPEST_HIGH_KG,FIPS_Code,State
259,"2,4-D",2015,8,13,592.9,593.0,8013,CO


## Create a Choropleth Map

Let's start with a state-level choropleth with sample unemployment data

In [12]:
m = folium.Map(location=[48, -102], zoom_start=3)

m.choropleth(
    geo_data=state_geo,
    name='choropleth',
    data=state_data,
    columns=['State', 'Unemployment'],
    key_on='feature.id',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Unemployment Rate (%)'
)


folium.LayerControl().add_to(m)

m

Now let's swap in our data...

In [13]:
state_data = pesticides.groupby('State')['EPEST_HIGH_KG'].apply(lambda x: x.sum()/1000).reset_index()

m = folium.Map(location=[48, -102], zoom_start=3)

m.choropleth(
    geo_data=state_geo,
    name='choropleth',
    data=state_data,
    columns=['State', 'EPEST_HIGH_KG'],
    key_on='feature.id',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Pesticide Application in Metric Tonnes'
)


folium.LayerControl().add_to(m)

m

In [14]:
state_data.sort_values(by='EPEST_HIGH_KG', ascending=False).head()

Unnamed: 0,State,EPEST_HIGH_KG
3,CA,65018.9316
44,WA,27771.1683
7,FL,27206.4447
11,IL,27179.7624
9,IA,26496.2135


That's sweet! Now let's provide higher-resolution with county-level data.  
Note how I swap-out the `geo_data` filepath.

In [15]:
county_data = pesticides.groupby('FIPS_Code')['EPEST_HIGH_KG'].apply(lambda x: x.sum()/10**6).reset_index()

m = folium.Map(location=[38, -100], zoom_start=4)

m.choropleth(
    geo_data=county_geo,
    name='choropleth',
    data=county_data,
    columns=['FIPS_Code', 'EPEST_HIGH_KG'],
    key_on='feature.id',
    fill_color='YlGn',
    threshold_scale= np.logspace(-1, 1, 6, endpoint=True).tolist(),
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Pesticide Application in Thousand Metric Tonnes'
)

folium.LayerControl().add_to(m)

m

Up until now, we have been aggregating (summing) across all pesticides in our database. But perhaps we want to look at one pesticide in particular. Create a choropleth map for each of the top-twelve most common pesticides by mass nationaly (known as the dirty dozen).

In [16]:
dirty_dozen = pesticides.groupby('COMPOUND').EPEST_HIGH_KG.sum().sort_values(ascending=False).head(12)
pesticides_top_12 = pesticides[pesticides.COMPOUND.isin(dirty_dozen.index)]

In [17]:
import numpy as np
import datetime
from IPython.display import HTML, Image

def _show_block(src, width=800, height=500):
    block = "<iframe src='%s' width=%i height=%i></iframe>" % (src, width, height)
    return HTML(block)

def _subset_dataframe(df, col_name, value):
    return df[df[col_name] == value]
    
def _create_choropleth(geo_data, data, feature_col, id_col, legend_title):
    m = folium.Map(location=[38, -100], zoom_start=4.2)
    m.choropleth(
        geo_data=geo_data,
        name='choropleth',
        data=data,
        columns=[id_col, feature_col],
        key_on='feature.id',
        fill_color='YlGn',
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name=legend_title
        )
    folium.LayerControl().add_to(m)
    return m

def customize_choropleth(
    geo_data=county_geo,
    data=pesticides, 
    feature='EPEST_HIGH_KG',
    identifier='FIPS_Code',
    select_by='COMPOUND', 
    select_value='2,4-D',
    save=False
):
    legend_title='{} Application in KG Per Annum'.format(select_value) 
    subset_data = _subset_dataframe(data, select_by, select_value)
    m = _create_choropleth(geo_data, subset_data, feature, identifier, legend_title)
    if save:
        if not os.path.exists('maps'): os.mkdir('maps')
        file_name = 'maps/choropleth_{}.html'.format(legend_title.replace(" ", "_"))
        m.save(file_name)
        return file_name
    else:
        return m

In [18]:
customize_choropleth(select_value='ATRAZINE')

In [19]:
maps = []
for pesticide_name in dirty_dozen.index:
    m = customize_choropleth(select_value=pesticide_name, save=True)
    maps.append(m)

In [20]:
_show_block(maps[3])

## Congratulations You Made It!

As an aside, here are some helper functions to create log scale for folium choropleth

In [21]:
def _get_base10_exponent(f):
    return int(np.floor(np.log10(abs(f)))) if f != 0 else 0

def _get_log10_scale(data, n_bins):
    min_range = np.floor(np.min(data))
    max_range = np.ceil(np.max(data))
    scale_range = np.logspace(_get_base10_exponent(min_range), _get_base10_exponent(max_range), n_bins, endpoint=False).tolist()
    return scale_range

_get_log10_scale(pesticides.EPEST_HIGH_KG, n_bins=6)

[1.0, 10.0, 100.0, 1000.0, 10000.0, 100000.0]