In [7]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt


In [None]:

def load_data():
    """Load data from the CSV files referundum/regions/departments."""
    referendum = pd.read_csv("data/referendum.csv",sep=";")
    regions = pd.read_csv("data/regions.csv")
    departments = pd.read_csv("data/departments.csv")

    return referendum, regions, departments

def merge_regions_and_departments(regions, departments):
    """Merge regions and departments in one DataFrame.

    The columns in the final DataFrame should be:`
    ['code_reg', 'name_reg', 'code_dep', 'name_dep']
    """
    region_temp = regions.drop(columns = ["id","slug"]).rename(columns={"code":"code_reg","name":"name_reg"})
    dept_temp = departments.drop(columns = ["id","slug"]).rename(columns={"code":"code_dep","name":"name_dep","region_code":"code_reg"})
    df = pd.merge(region_temp, dept_temp, on="code_reg",how='left')

    return df

def merge_referendum_and_areas(referendum, regions_and_departments):
    """Merge referendum and regions_and_departments in one DataFrame.

    You can drop the lines relative to DOM-TOM-COM departments, and the
    french living abroad.
    """

    regions_and_departments = regions_and_departments[~ (regions_and_departments['code_reg']=='COM')]
    referendum.loc[referendum['Department code'].str.len() == 1, 'Department code'] = referendum['Department code'].str.zfill(2)
    referendum = referendum[~referendum['Department code'].str.match('^[Z]')]
    # quit() 
    
    df = pd.merge(regions_and_departments, referendum, left_on="code_dep", right_on="Department code",how='right')
    
    return df

def compute_referendum_result_by_regions(referendum_and_areas):
    """Return a table with the absolute count for each region.

    The return DataFrame should be indexed by `code_reg` and have columns:
    ['name_reg', 'Registered', 'Abstentions', 'Null', 'Choice A', 'Choice B']
    """
    referendum_and_areas = referendum_and_areas.set_index('code_reg')    
    referendum_and_areas[['Registered', 'Abstentions', 'Choice A', 'Choice B', 'Null']] = referendum_and_areas[['Registered', 'Abstentions', 'Choice A', 'Choice B', 'Null']].astype(int)
    referendum_result = referendum_and_areas[['name_reg', 'Registered', 'Abstentions', 'Choice A', 'Choice B', 'Null']].groupby(['code_reg', 'name_reg']).sum()
    referendum_result = referendum_result.reset_index().set_index('code_reg')
    return referendum_result

def plot_referendum_map(referendum_result_by_regions):
    """Plot a map with the results from the referendum.

    * Load the geographic data with geopandas from `regions.geojson`.
    * Merge these info into `=`.
    * Use the method `GeoDataFrame.plot` to display the result map. The results
      should display the rate of 'Choice A' over all expressed ballots.
    * Return a gpd.GeoDataFrame with a column 'ratio' containing the results.
    """
    gdf = gpd.read_file("data/regions.geojson")
    
    gdf = gdf.rename(columns={"code": "code_reg"})
    
    referendum_result_by_regions = referendum_result_by_regions.reset_index()
    referendum_result_by_regions['expressed_ballots'] = referendum_result_by_regions['Registered'] - referendum_result_by_regions['Abstentions']
    referendum_result_by_regions['ratio'] = referendum_result_by_regions['Choice A'] / referendum_result_by_regions['expressed_ballots']

    merged_gdf = gdf.merge(referendum_result_by_regions[['code_reg', 'name_reg', 'ratio']], on="code_reg", how="left")
    ax = merged_gdf.plot(column='ratio', legend=True, figsize=(10, 10),
                         legend_kwds={'label': "Rate of 'Choice A' over expressed ballots"})
    ax.set_title("Referendum Results: Choice A Rate by Region")
    return merged_gdf

In [32]:
referendum, df_reg, df_dep = load_data()
regions_and_departments = merge_regions_and_departments(
    df_reg, df_dep
)
referendum_and_areas = merge_referendum_and_areas(
    referendum, regions_and_departments
)
referendum_results = compute_referendum_result_by_regions(
    referendum_and_areas
)
referendum_results = compute_referendum_result_by_regions(
    referendum_and_areas
)
# printreferendum_results)

df = plot_referendum_map(referendum_results)
df
# plt.show()


Unnamed: 0,code_reg,nom,geometry,Choice A_ratio
0,11,Île-de-France,"POLYGON ((1.92215 48.4576, 1.92074 48.44775, 1...",0.467452
1,24,Centre-Val de Loire,"POLYGON ((0.81482 48.67016, 0.82767 48.68072, ...",0.400656
2,27,Bourgogne-Franche-Comté,"POLYGON ((6.94054 47.43337, 6.94168 47.4158, 6...",0.407565
3,28,Normandie,"POLYGON ((-1.11962 49.35557, -1.11503 49.3624,...",0.419669
4,32,Hauts-de-France,"POLYGON ((4.14089 49.97876, 4.15398 49.97484, ...",0.422041
5,44,Grand Est,"POLYGON ((3.41479 48.39027, 3.42208 48.41334, ...",0.354189
6,52,Pays de la Loire,"MULTIPOLYGON (((-2.30479 46.70942, -2.28612 46...",0.420656
7,53,Bretagne,"MULTIPOLYGON (((-2.12371 48.60441, -2.13448 48...",0.46831
8,75,Nouvelle-Aquitaine,"MULTIPOLYGON (((-1.02574 45.57469, -0.993 45.5...",0.47017
14,76,Occitanie,"MULTIPOLYGON (((1.78613 42.57362, 1.78094 42.5...",0.456739


<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fde883a3c70>

In [6]:
departments.head()

Unnamed: 0,id,region_code,code,name,slug
0,1,84,1,Ain,ain
1,2,32,2,Aisne,aisne
2,3,84,3,Allier,allier
3,4,93,4,Alpes-de-Haute-Provence,alpes de haute provence
4,5,93,5,Hautes-Alpes,hautes alpes
