# Choropleth Map
This map shows the difference between the percentage of boys vs the percentage of girls entering into primary school by country. The difference is calculated by % of boys – % of girls. The color bar represents how large the gap is.

### SetUp

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import plotly.plotly as py
import plotly.graph_objs as go
import plotly

In [2]:
# helper function
def assign_continent(country_code):
    """given a 3 letter country code,
    return the continent the country belongs to"""
    try:
        row = country_to_continent[country_to_continent.Three_Letter_Country_Code == country_code]
        continent = row.Continent_Name.values[0]
        return continent
    except:
        return None

### Pre-Processing
1. Filter Columns
2. Rename Columns
3. Assign each continennt to each country
4. Filter out the non-country level rows
5. Pivot

In [3]:
# read in data
entry = pd.read_csv('../data/entry_to_primary_edu.csv')
country_to_continent = pd.read_csv('../data/country_continent.csv')

# 1. filter
country_to_continent = country_to_continent[['Three_Letter_Country_Code', 'Continent_Name', 'Country_Name']]
entry = entry[['Indicator', 'LOCATION', 'Country', 'Time', 'Value']]
entry = entry[entry.Time != 2018]  # 2018 missing data

# 2. rename column
entry = entry.replace('New entrants to Grade 1 of primary education, female (number)', 'Girls')
entry = entry.replace('New entrants to Grade 1 of primary education, male (number)', 'Boys')

# 3. Assign each country their continent
entry['Continent'] = entry.LOCATION.apply(lambda x: assign_continent(x))

# 4. keep only country level rows
countries = country_to_continent.Three_Letter_Country_Code.unique()
entry_by_country = entry[entry.LOCATION.isin(countries)]

# 5. pivot
entry_pivot = pd.pivot_table(entry_by_country, values='Value', 
                             index=['Continent', 'LOCATION', 'Country', 'Time'], 
                             columns='Indicator')

### Calculations

In [4]:
# mean enrollment for country
entry_by_contry = entry_pivot.groupby(['Country', 'LOCATION']).mean()

# difference in % of boys vs girls in school
entry_by_contry['Dif%'] = entry_by_contry.apply(lambda x: (x[0]/(x[0] + x[1]) - x[1]/(x[0] + x[1]))*100, axis=1)

# filter wanted columns
entry_by_contry = entry_by_contry.reset_index()[['Country', 'LOCATION', 'Dif%']]

entry_by_contry.head()

Indicator,Country,LOCATION,Dif%
0,Burundi,BDI,1.218825
1,Benin,BEN,3.897341
2,Burkina Faso,BFA,4.722228
3,Botswana,BWA,4.86614
4,Central African Republic,CAF,10.225004


### Plot

In [5]:
data = [go.Choropleth(
    locations = entry_by_contry['LOCATION'],
    z = entry_by_contry['Dif%'],
    text = entry_by_contry['Country'],
    autocolorscale = True,
    reversescale = True,
    marker = go.choropleth.Marker(
        line = go.choropleth.marker.Line(
            color = 'rgb(180,180,180)',
            width = 0.5
        )),
    colorbar = go.choropleth.ColorBar(
        tickprefix = '%',
        title = '% Difference of Boys vs Girls'),
)]

layout = go.Layout(
    title = go.layout.Title(
        text = 'Difference in Primary School Entry for Boys vs. Girls'
    ),
    geo = go.layout.Geo(
        showframe = False,
        showcoastlines = True,
        projection = go.layout.geo.Projection(
            type = 'equirectangular'
        )
    ),
)

fig = go.Figure(data = data, layout = layout)
py.iplot(fig, filename = 'ChoroplethMap')


Consider using IPython.display.IFrame instead

