# Bubble Map
This bubble map shows which countries have the largest illiterate youth population. The Plotly version of the map is interactive and you can hover over the bubbles to see the country’s name and illiteracy rate.

### SetUp

In [1]:
import pandas as pd
import plotly.plotly as py
import plotly.graph_objs as go
import plotly
import geopy.geocoders as geocoders

In [2]:
geolocator = geocoders.Nominatim(user_agent="my_app")

# helper functions
def assign_continent(country_code):
    """given a 3 letter country code,
    return the continent the country belongs to"""
    try:
        row = country_to_continent[country_to_continent.Three_Letter_Country_Code == country_code]
        continent = row.Continent_Name.values[0]
        return continent
    except:
        return None
    
def get_lat_lon(c):
    try:
        location = geolocator.geocode(c)
        return (location.latitude, location.longitude)
    except:
        print(c)
        return None, None

### Pre-Processing
1. Filter Columns
2. Rename Columns
3. Assign each continennt to each country
4. Filter out the non-country level rows

In [3]:
# read in data
country_to_continent = pd.read_csv('../data/country_continent.csv')
literacy_rate = pd.read_csv('../data/youth_literacy_rate.csv')


# 1. filter columns
literacy_rate = literacy_rate[['Indicator', 'LOCATION', 'Country', 'Time', 'Value']]

# 2. rename column
literacy_rate = literacy_rate.replace('Youth literacy rate, population 15-24 years, female (%)', 
                           'Girls')
literacy_rate = literacy_rate.replace('Youth literacy rate, population 15-24 years, male (%)', 
                           'Boys')

# 3. Assign each country their continent
literacy_rate['Continent'] = literacy_rate.LOCATION.apply(lambda x: assign_continent(x))

# 4. keep only country level rows
countries = country_to_continent.Three_Letter_Country_Code.unique()
literacy_rate_by_country = literacy_rate[literacy_rate.LOCATION.isin(countries)]

### Calculations

In [4]:
# average illiteracy rate
literacy_rate_by_country = literacy_rate_by_country.groupby(['Country', 'LOCATION', 'Continent']).mean()

literacy_rate_by_country.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Time,Value
Country,LOCATION,Continent,Unnamed: 3_level_1,Unnamed: 4_level_1
Angola,AGO,Africa,2014.0,77.723755
Argentina,ARG,South America,2015.0,99.45643
Azerbaijan,AZE,Europe,2015.0,99.945877
Bahrain,BHR,Asia,2016.0,94.235375
Bangladesh,BGD,Asia,2015.5,89.64901


### Plot

In [5]:
# color pallet
color = {'Asia':'#e41a1c', 'North America':'#377eb8',
        'Europe':'#FF69B4', 'South America':'#984ea3',
        'Africa':'#ff7f00', 'Oceania':'#4daf4a'}

In [7]:
# layout of plot
layout = go.Layout(
        title = go.layout.Title(
            text = 'Illiteracy Rates Around the World'
        ),
        showlegend = True,
        geo = go.layout.Geo(
          resolution = 110,
          showcountries = True,
          landcolor = "#e8e8e8",
          showland = True,
          coastlinewidth = 1
    ))

# objects to be ploted
countries = list()
for c, row in literacy_rate_by_country.iterrows():
    lat, lon = get_lat_lon(c[0])
    if lat is None:
        continue
    val = (100 - row['Value'])
    if val < 5:  # too samll, make a little bigger
        val = 5
    elif val > 30:  # too large, make a little smaller
        val = 40
    country = go.Scattergeo(
               lat = [lat],
               lon = [lon],
               text = 100 - row['Value'],
               mode = 'markers',
               marker = go.scattergeo.Marker(
                        size = val,
                       opacity = 0.65,
                        color = color[c[2]],
               sizemode = 'area'),
               name = c[0])
    
    countries.append(country)

# plot
fig = go.Figure(data=countries, layout=layout)
py.iplot(fig, filename='BubbleMap')


Consider using IPython.display.IFrame instead

