In [290]:
import pandas as pd
import json
import folium
import os
import requests
from bs4 import BeautifulSoup
# Warning below have been added to the first list
from ipywidgets import interact
import ipywidgets as widgets

# European unemployment

We obtained the data from [this eurostat page](http://ec.europa.eu/eurostat/data/database) navigating into the following directories hierarchy:

 - Tables by theme
 - Population and social conditions
 - Labour market
 - Employment and unemployment (Labour force survey)
 - LFS Main indicators
 - Unemployment - LFS adjusted series
 - Total unemployment rate

[Statistical description of the dataset](http://ec.europa.eu/eurostat/cache/metadata/en/tsdec450_esmsip.htm)

In [291]:
euro = pd.read_csv('Homework3-data/european_unemployment.tsv',
                   sep='\t',
                   usecols=['age,unit,sex,geo\\time', '2016 '])
euro.head()

Unnamed: 0,"age,unit,sex,geo\time",2016
0,"TOTAL,PC_ACT,T,AT",6.0
1,"TOTAL,PC_ACT,T,BE",7.8
2,"TOTAL,PC_ACT,T,BG",7.6
3,"TOTAL,PC_ACT,T,CY",13.0
4,"TOTAL,PC_ACT,T,CZ",4.0


In [292]:
countries = pd.DataFrame(euro['age,unit,sex,geo\\time'].str.split(',', expand=True))
countries.head()

Unnamed: 0,0,1,2,3
0,TOTAL,PC_ACT,T,AT
1,TOTAL,PC_ACT,T,BE
2,TOTAL,PC_ACT,T,BG
3,TOTAL,PC_ACT,T,CY
4,TOTAL,PC_ACT,T,CZ


In [293]:
countries[0].unique()

array(['TOTAL'], dtype=object)

In [294]:
countries[1].unique()

array(['PC_ACT', 'PC_POP', 'THS_PER'], dtype=object)

As indicated [here](https://www.google.ch/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&ved=0ahUKEwiFm4i2-pzXAhUGZlAKHRd_BmQQFggmMAA&url=http%3A%2F%2Fec.europa.eu%2Feurostat%2Fdocuments%2F10186%2F6246844%2FChanges%2Bto%2Bthe%2Bunemployment%2Bdata%2Btables.pdf&usg=AOvVaw10j2p5RgC8vFvpEuvBX2Jn) we only keep `PC_ACT`

In [295]:
countries = countries[countries[1] == 'PC_ACT'][[3]]
countries.columns = ['country']
countries.head()

Unnamed: 0,country
0,AT
1,BE
2,BG
3,CY
4,CZ


In [296]:
euro = countries.merge(euro, left_index=True, right_index=True)\
                .drop('age,unit,sex,geo\\time', axis=1)
euro.columns = ['country', '2016']

In [297]:
euro.head()

Unnamed: 0,country,2016
0,AT,6.0
1,BE,7.8
2,BG,7.6
3,CY,13.0
4,CZ,4.0


In [298]:
euro['2016'].describe()

count    37.000000
mean      8.286486
std       4.201201
min       3.000000
25%       5.900000
50%       7.800000
75%      10.000000
max      23.600000
Name: 2016, dtype: float64

In [299]:
KIEV = [50.450, 30.5234]

In [300]:
euromap = folium.Map(location=KIEV, zoom_start=4)
euromap

In [301]:
geo_data_bis = json.load(open(r'Homework3-data/topojson/europe.topojson.json'))

In [302]:
map_id, map_name = zip(*(map(lambda obj: (obj['id'], obj['properties']['NAME']),
                        geo_data_bis['objects']['europe']['geometries'])))

In [303]:
map_data = pd.DataFrame({'id': map_id, 'name': map_name})

In [304]:
euro = euro.merge(map_data, left_on='country', right_on='id', how='right').drop('id', axis=1)

In [305]:
euro = euro[['country', 'name', '2016']]

In [306]:
euromap.choropleth(
    geo_data=geo_data_bis,
    data=euro,
    topojson="objects.europe",
    columns=['country', '2016'],
    key_on='feature.id',
    fill_color='BuPu', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Unemployment rate'
)

In [307]:
euromap

# Switzerland Unemployment

In [308]:
chomeurs = pd.read_csv('Homework3-data/chomeursBis.csv', delimiter=' ', encoding = "ISO-8859-1")
chomeurs

Unnamed: 0,Canton,Rate
0,Zurich,3.3
1,Berne,2.4
2,Lucerne,1.7
3,Uri,0.6
4,Schwyz,1.7
5,Obwald,0.7
6,Nidwald,1.0
7,Glaris,1.8
8,Zoug,2.3
9,Fribourg,2.7


In [309]:
# Download Canton codes and french names from wikipedia to match with topojson
page = requests.get('https://fr.wikipedia.org/wiki/Canton_(Suisse)')
soup = BeautifulSoup(page.text, 'html.parser')
table_rows = soup.find('table').findAll('tr')
canton, canton_id = zip(*[(row.find('td').find('a').text, row.find('th').text)
                          for row in table_rows[1:-1]])

In [310]:
cantons_ids = pd.DataFrame({'canton': canton, 'id': canton_id})

In [311]:
chomeurs = chomeurs.merge(cantons_ids, left_on='Canton', right_on='canton')\
                   .drop('canton', axis=1)
chomeurs = chomeurs[['id', 'Canton', 'Rate']]

In [312]:
chomeurs.head()

Unnamed: 0,id,Canton,Rate
0,ZH,Zurich,3.3
1,BE,Berne,2.4
2,LU,Lucerne,1.7
3,UR,Uri,0.6
4,OW,Obwald,0.7


In [313]:
def create_swissmap_with_cantons(line_color="blue", line_width=2):
    swissmap = folium.Map(location=[46.8,8.33], zoom_start=8)
    geo_data_swiss = json.load(open(r'Homework3-data/topojson/ch-cantons.topojson.json'))
    folium.TopoJson(
        geo_data_swiss,
        'objects.cantons',
        name='topojsonBis',
        style_function=lambda feature:{
            'color'  : line_color,
            'weigth' : line_width
        }
    ).add_to(swissmap)
    return swissmap

In [314]:
swissmap = create_swissmap_with_cantons()
swissmap

In [315]:
def create_swissmap_with_choropleth(df, columns, legend_name, fill_color='BuPu', line_color='blue', line_width=2):
    swissmap = create_swissmap_with_cantons(line_color, line_width)
    swissmap.choropleth(
        geo_data=geo_data_swiss,
        data=df,
        topojson="objects.cantons",
        columns=columns,
        key_on='feature.id',
        fill_color=fill_color, fill_opacity=0.7, line_opacity=0.2,
        legend_name=legend_name
    )
    return swissmap

In [316]:
swissmap = create_swissmap_with_choropleth(chomeurs, ['id', 'Rate'], 'Unemployment rate')
swissmap

# Q3

In this section we want to analyze the unemployment rate difference between Swiss and foreign workers. We will reuse the amstat website to extract the data.

In [317]:
UNEMPLOYMENT_SWISS_FOREIGN_AGE_DOC="Homework3-data/chomeurs_et_recherche_par_canton_et_age.csv"

In [318]:
# Extract the unemployment rate of swiss and foreign workers
un_swiss_foreign_raw_df = pd.read_csv("Homework3-data/taux_chomage_nationalite.csv",
                                   encoding = "UTF-16",
                                   header = 0,
                                   skiprows=[0],
                                   usecols=[0,1,3,4],
                                   #index_col=[0,1],
                                   names=["canton", "nationality", "rate", "total"]
                                   )
# translate values in the nationality column
un_swiss_foreign_raw_df["nationality"] = un_swiss_foreign_raw_df["nationality"].map({"Etrangers":"foreign", "Suisses":"swiss"})
display(un_swiss_foreign_raw_df.head(5))
display(un_swiss_foreign_raw_df.tail(5))

Unnamed: 0,canton,nationality,rate,total
0,Zurich,foreign,5.3,12'111
1,Zurich,swiss,2.5,15'114
2,Berne,foreign,5.5,4'900
3,Berne,swiss,1.8,8'758
4,Lucerne,foreign,3.9,1'593


Unnamed: 0,canton,nationality,rate,total
48,Genève,foreign,5.7,5'942
49,Genève,swiss,4.8,6'292
50,Jura,foreign,9.0,505
51,Jura,swiss,3.6,1'114
52,Total,,3.0,133'169


In [319]:
# Extract the unemployement rate by age
un_swiss_age_raw_df = pd.read_csv("Homework3-data/taux_chomage_age.csv",
                                   encoding = "UTF-16",
                                   header = 0,
                                   skiprows=[0],
                                   usecols=[0,1,3,4],
                                   #index_col=[0,1],
                                   names=["canton", "age", "rate", "total"]
                                   )

print("Without age conversion")
display(un_swiss_age_raw_df.head(5))

# Change age convention so it is clearer
age_convention_map_dict = {}
for start in range(15,61,5):
    if start == 60:
        age_convention_map_dict["60 ans et plus"] = "60+"
    else:
        key = "{}-{} ans".format(start, start+4)
        value = "{}-{}".format(start, start+4)
        age_convention_map_dict[key] = value
un_swiss_age_raw_df["age"] = un_swiss_age_raw_df["age"].map(age_convention_map_dict)
print("After age conversion")
un_swiss_age_raw_df.head(5)

Without age conversion


Unnamed: 0,canton,age,rate,total
0,Zurich,15-19 ans,3.9,1'223
1,Zurich,20-24 ans,3.7,2'197
2,Zurich,25-29 ans,3.5,3'131
3,Zurich,30-34 ans,3.7,3'945
4,Zurich,35-39 ans,3.8,3'754


After age conversion


Unnamed: 0,canton,age,rate,total
0,Zurich,15-19,3.9,1'223
1,Zurich,20-24,3.7,2'197
2,Zurich,25-29,3.5,3'131
3,Zurich,30-34,3.7,3'945
4,Zurich,35-39,3.8,3'754


In [320]:
# Lets define a variable that we can easily use to go through the age categories
# age_category_index = un_swiss_age_raw_df.index.get_level_values(1).drop_duplicates().dropna()
age_category_index = un_swiss_age_raw_df["age"].drop_duplicates().dropna()

In [321]:
display(cantons_ids.head(5))

Unnamed: 0,canton,id
0,Zurich,ZH
1,Berne,BE
2,Lucerne,LU
3,Uri,UR
4,Schwytz,SZ


In [322]:
# add canton id to both dataframes and properly organize the columns
un_swiss_foreign_id_df = pd.merge(un_swiss_foreign_raw_df, cantons_ids, on="canton")
un_swiss_foreign_id_df = un_swiss_foreign_id_df[["id", "canton", "nationality", "rate", "total"]]
display(un_swiss_foreign_id_df.head(5))

un_swiss_age_id_df = pd.merge(un_swiss_age_raw_df, cantons_ids, on="canton")
un_swiss_age_id_df = un_swiss_age_id_df[["id", "canton", "age", "rate", "total"]]
display(un_swiss_age_id_df.head(5))

Unnamed: 0,id,canton,nationality,rate,total
0,ZH,Zurich,foreign,5.3,12'111
1,ZH,Zurich,swiss,2.5,15'114
2,BE,Berne,foreign,5.5,4'900
3,BE,Berne,swiss,1.8,8'758
4,LU,Lucerne,foreign,3.9,1'593


Unnamed: 0,id,canton,age,rate,total
0,ZH,Zurich,15-19,3.9,1'223
1,ZH,Zurich,20-24,3.7,2'197
2,ZH,Zurich,25-29,3.5,3'131
3,ZH,Zurich,30-34,3.7,3'945
4,ZH,Zurich,35-39,3.8,3'754


In [329]:
# There's cases where the age is not a proper value, replace it by 0
un_swiss_age_id_df[un_swiss_age_id_df["rate"] == "..."] = 0

In [323]:
q3_swiss_map = create_swissmap_with_choropleth(un_swiss_foreign_id_df[un_swiss_foreign_id_df["nationality"] == "foreign"],
                                              columns=['id', 'rate'],
                                              legend_name='Foreign Unemployment Rate')
q3_swiss_map

In [330]:
display(un_swiss_age_id_df[un_swiss_age_id_df["age"] == "15-19"])
q3_swiss_map = create_swissmap_with_choropleth(un_swiss_age_id_df[un_swiss_age_id_df["age"] == "15-19"],
                                              columns=['id', 'rate'],
                                              legend_name='Foreign Unemployment Rate')
q3_swiss_map

Unnamed: 0,id,canton,age,rate,total
0,ZH,Zurich,15-19,3.9,1'223
10,BE,Berne,15-19,2.2,569
20,LU,Lucerne,15-19,2.1,232
30,UR,Uri,15-19,0.4,5
40,OW,Obwald,15-19,0.4,5
50,NW,Nidwald,15-19,0.5,6
60,GL,Glaris,15-19,1.2,16
70,ZG,Zoug,15-19,1.4,36
80,FR,Fribourg,15-19,2.2,173
90,SO,Soleure,15-19,2.6,186


TypeError: can't multiply sequence by non-int of type 'float'

In [324]:
def get_dataframe_columns_and_legend_from_values(dataset, nationality="foreign", low_age=15):
    """Function to return the values needed by create_swiss_map_with_choroplet"""
    
    if dataset == "global":
        return {"df":chomeurs,
                "columns":['id', 'Rate'],
                "legend_name":'Unemployment rate'}
    elif dataset == "nationality":
        columns = ['id', 'rate']
        if nationality == "foreign":
            return {"df":un_swiss_foreign_id_df[un_swiss_foreign_id_df["nationality"] == "foreign"],
                    "columns":columns,
                    "legend_name":'Foreign Unemployment Rate'
                   }
        elif nationality == "swiss":
             return {"df":un_swiss_foreign_id_df[un_swiss_foreign_id_df["nationality"] == "swiss"],
                    "columns":columns,
                    "legend_name":'Swiss Unemployment Rate'
                    }
        elif nationality == "difference":
             df=un_swiss_foreign_id_df[un_swiss_foreign_id_df["nationality"] == "foreign"].subtract(un_swiss_foreign_id_df[un_swiss_foreign_id_df["nationality"] == "swiss"])
             return {"df":df,
                     "columns":columns,
                     "legend_name":'Difference between Foreign and Swiss unemployment rates'
                     }
        else:
            print("ERROR: {} is not a valid nationality value".format(nationality))
            raise
    elif dataset == "age":
        columns=['id', 'rate']
        if low_age == 60:
            return {"df":un_swiss_age_id_df[un_swiss_age_id_df["age"] == "60+"],
                    "columns":columns,
                    "legend_name":'Unemployment rate in people over 60 years old'
                   }
        elif low_age in range(15,60,5):
            legend_name="Unemployment rate of people from {} to {} years old".format(low_age, low_age+4)
            return {"df": un_swiss_age_id_df[un_swiss_age_id_df["age"] == "{}-{}".format(low_age, low_age+4)],
                    "columns":columns,
                    "legend_name":legend_name
                    }
        else:
            print("ERROR: {} is not a valid low_age value".format(age))
            raise  
    else:
        print("ERROR: {} is not a valid dataset value".format(dataset))
        raise

In [325]:
 
def plot_swiss_unemployment_map(dataset="nationality", nationality="foreign", low_age=15):
    """Will plot a swiss unemployment map depending on parameters
    
        @param dataset: which dataset to use, choose from: ["global", "nationality", "age"]
        @param nationality: which nationality to select from : ["foreign", "swiss", "difference"]
        @param age : starting age of the 5 year group from which the data will be taken select from: range(15,61,5)
    """
    choropleth_info_dict= get_dataframe_columns_and_legend_from_values(dataset, nationality, low_age)
    print(choropleth_info_dict)
    q3_swiss_map = create_swissmap_with_choropleth(choropleth_info_dict["df"],
                                              columns=choropleth_info_dict["columns"],
                                              legend_name=choropleth_info_dict["legend_name"])
    return q3_swiss_map

In [326]:
interact(plot_swiss_unemployment_map,
         dataset=["global", "nationality", "age"],
         nationality=["foreign", "swiss", "difference"],
         low_age=(15,61,5)
        )

A Jupyter Widget

<function __main__.plot_swiss_unemployment_map>