In [2]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

In [3]:
def load_data():
    """Load data from the CSV files referundum/regions/departments."""
    referendum = pd.read_csv('data/referendum.csv',
                             sep=';')
    regions = pd.read_csv('data/regions.csv')
    departments = pd.read_csv('data/departments.csv')

    return referendum, regions, departments

In [4]:
referendum, regions, departments = load_data()

regions.head()

Unnamed: 0,id,code,name,slug
0,1,1,Guadeloupe,guadeloupe
1,2,2,Martinique,martinique
2,3,3,Guyane,guyane
3,4,4,La Réunion,la reunion
4,5,6,Mayotte,mayotte


In [5]:

departments.head()

Unnamed: 0,id,region_code,code,name,slug
0,1,84,1,Ain,ain
1,2,32,2,Aisne,aisne
2,3,84,3,Allier,allier
3,4,93,4,Alpes-de-Haute-Provence,alpes de haute provence
4,5,93,5,Hautes-Alpes,hautes alpes


In [6]:
def merge_regions_and_departments(regions, departments):
    """Merge regions and departments in one DataFrame.

    The columns in the final DataFrame should be:
    ['code_reg', 'name_reg', 'code_dep', 'name_dep']
    """
    df_merge = pd.merge(departments, regions, how='inner', 
                        left_on='region_code', right_on='code', 
                        suffixes= ('_dep', '_reg'))

    return df_merge.drop(['id_reg', 'slug_reg', 'id_dep', 'region_code', 'slug_dep'], axis=1)

In [7]:
regions_and_departments = merge_regions_and_departments(regions, departments)

In [8]:
referendum.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36791 entries, 0 to 36790
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Department code  36791 non-null  object
 1   Department name  36791 non-null  object
 2   Town code        36791 non-null  int64 
 3   Town name        36791 non-null  object
 4   Registered       36791 non-null  int64 
 5   Abstentions      36791 non-null  int64 
 6   Null             36791 non-null  int64 
 7   Choice A         36791 non-null  int64 
 8   Choice B         36791 non-null  int64 
dtypes: int64(6), object(3)
memory usage: 2.5+ MB


In [25]:
mask = referendum['Department code'].str.contains('Z')
mask

0        False
1        False
2        False
3        False
4        False
         ...  
36786     True
36787     True
36788     True
36789     True
36790     True
Name: Department code, Length: 36791, dtype: bool

In [32]:
def merge_referendum_and_areas(referendum, regions_and_departments):
    """Merge referendum and regions_and_departments in one DataFrame.

    You can drop the lines relative to DOM-TOM-COM departments, and the
    french living abroad.
    """
    mask = ~referendum['Department code'].str.contains('Z')
    referendum['Department code'] = referendum['Department code'].str.zfill(2)
    return pd.merge(referendum.loc[mask,:], regions_and_departments, how='inner', 
                    left_on='Department code', right_on='code_dep')

In [34]:
referendum_and_areas = merge_referendum_and_areas(referendum, regions_and_departments)

In [35]:
referendum_and_areas.columns

Index(['Department code', 'Department name', 'Town code', 'Town name',
       'Registered', 'Abstentions', 'Null', 'Choice A', 'Choice B', 'code_dep',
       'name_dep', 'code_reg', 'name_reg'],
      dtype='object')

In [36]:
def compute_referendum_result_by_regions(referendum_and_areas):
    """Return a table with the absolute count for each region.

    The return DataFrame should be indexed by `code_reg` and have columns:
    ['name_reg', 'Registered', 'Abstentions', 'Null', 'Choice A', 'Choice B']
    """

    return referendum_and_areas.loc[:, ['name_reg', 'Registered', 'Abstentions',
                                        'Null', 'Choice A', 'Choice B']].groupby('code_reg').agg(np.sum)

In [37]:
compute_referendum_result_by_regions(referendum_and_areas)

KeyError: 'code_reg'