In [3]:
"""Plotting referendum results in pandas.

In short, we want to make beautiful map to report results of a referendum. In
some way, we would like to depict results with something similar to the maps
that you can find here:
https://github.com/x-datascience-datacamp/datacamp-assignment-pandas/blob/main/example_map.png

To do that, you will load the data as pandas.DataFrame, merge the info and
aggregate them by regions and finally plot them on a map using `geopandas`.
"""
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt


In [4]:
def load_data():
    """Load data from the CSV files referundum/regions/departments."""
    referendum = pd.read_csv('data/referendum.csv', sep=';')
    regions = pd.read_csv('data/regions.csv')
    departments = pd.read_csv('data/departments.csv')

    return referendum, regions, departments


In [5]:
referendum = pd.read_csv('data/referendum.csv', sep=';')
regions = pd.read_csv('data/regions.csv')
departments = pd.read_csv('data/departments.csv')


In [7]:
def merge_regions_and_departments(regions, departments):
    """Merge regions and departments in one DataFrame.

    The columns in the final DataFrame should be:
    ['code_reg', 'name_reg', 'code_dep', 'name_dep']
    """
    
   # Merge regions and departments on the corresponding columns
    merged_df = pd.merge(regions, departments, how='inner', left_on='code', right_on='region_code')

    # Select the required columns
    result_df = merged_df[['code_x', 'name_x', 'code_y', 'name_y']]

    # Rename the columns for consistency
    result_df = result_df.rename(columns={'code_x': 'code_reg', 'name_x': 'name_reg', 'code_y':'code_dep','name_y': 'name_dep'})

    return result_df


In [10]:
regions_and_departments=merge_regions_and_departments(regions, departments)

def merge_referendum_and_areas(referendum, regions_and_departments):
    """Merge referendum and regions_and_departments in one DataFrame.

    You can drop the lines relative to DOM-TOM-COM departments, and the
    french living abroad, which all have a code that contains `Z`.

    DOM-TOM-COM departments are departements that are remote from metropolitan
    France, like Guadaloupe, Reunion, or Tahiti.
    """

    referendum = referendum.loc[~referendum['Department code'].str.startswith('Z')]
    merged_df = pd.merge(regions_and_departments, referendum, how='inner', left_on='code_dep', right_on='Department code')
    result_df = merged_df.rename(columns={'code_reg': 'Region code', 'name_reg':'Region name'})
    result_df = result_df.drop({'code_dep','name_dep'}, axis=1)
    return result_df 



In [17]:
referendum_and_areas=merge_referendum_and_areas(referendum,regions_and_departments)
print(referendum_and_areas)

      Region code    Region name Department code Department name  Town code  \
0              11  Île-de-France              75           PARIS         56   
1              11  Île-de-France              77  SEINE ET MARNE          1   
2              11  Île-de-France              77  SEINE ET MARNE          2   
3              11  Île-de-France              77  SEINE ET MARNE          3   
4              11  Île-de-France              77  SEINE ET MARNE          4   
...           ...            ...             ...             ...        ...   
33331          94          Corse              2B     HAUTE CORSE        356   
33332          94          Corse              2B     HAUTE CORSE        361   
33333          94          Corse              2B     HAUTE CORSE        364   
33334          94          Corse              2B     HAUTE CORSE        365   
33335          94          Corse              2B     HAUTE CORSE        366   

                    Town name  Registered  Abstenti

In [24]:
referendum_and_areas=merge_referendum_and_areas(referendum,regions_and_departments)

def compute_referendum_result_by_regions(referendum_and_areas):
    """Return a table with the absolute count for each region.

    The return DataFrame should be indexed by `code_reg` and have columns:
    ['name_reg', 'Registered', 'Abstentions', 'Null', 'Choice A', 'Choice B']
    """
    
    referendum_and_areas = referendum_and_areas.rename(columns={'Region code': 'code_reg', 'Region name':'name_reg'})
    referendum_and_areas = referendum_and_areas.drop({'Department code', 'Department name','Town code', 'Town name'}, axis=1)
    result_df = referendum_and_areas.groupby(['code_reg', 'name_reg']).sum().reset_index()
    return result_df

In [33]:
referendum_result_by_regions=compute_referendum_result_by_regions(referendum_and_areas)

def plot_referendum_map(referendum_result_by_regions):
    """Plot a map with the results from the referendum.

    * Load the geographic data with geopandas from `regions.geojson`.
    * Merge these info into `referendum_result_by_regions`.
    * Use the method `GeoDataFrame.plot` to display the result map. The results
      should display the rate of 'Choice A' over all expressed ballots.
    * Return a gpd.GeoDataFrame with a column 'ratio' containing the results.
    """

    # Load the geographic data from 'regions.geojson'
    geo_data = gpd.read_file('data/regions.geojson')

    # Merge geographic data with referendum results
    merged_data = geo_data.merge(referendum_result_by_regions, left_on='code_reg', right_index=True)

    # Calculate the ratio of 'Choice A' over all expressed ballots
    merged_data['ratio'] = merged_data['Choice A'] / (merged_data['Choice A'] + merged_data['Choice B'])

    # Plot the map
    merged_data.plot(column='ratio', cmap='viridis', legend=True, figsize=(12, 8))
    plt.title('Referendum Results: Ratio of "Choice A" over all expressed ballots')
    plt.show()

    return merged_data


In [36]:
if __name__ == "__main__":

    referendum, df_reg, df_dep = load_data()
    regions_and_departments = merge_regions_and_departments(
        df_reg, df_dep
    )
    referendum_and_areas = merge_referendum_and_areas(
        referendum, regions_and_departments
    )
    referendum_results = compute_referendum_result_by_regions(
        referendum_and_areas
    )
    print(referendum_results)

    plot_referendum_map(referendum_results)
    plt.show()
    

   code_reg                    name_reg  Registered  Abstentions   Null  \
0        11               Île-de-France     6942361      1515884  85997   
1        24         Centre-Val de Loire     1801366       326560  28827   
2        27     Bourgogne-Franche-Comté     1997925       358321  33671   
3        28                   Normandie     2372779       426075  35513   
4        32             Hauts-de-France     3834376       793627  55405   
5        44                   Grand Est     3660510       742840  54471   
6        52            Pays de la Loire     2605438       412509  47677   
7        53                    Bretagne     2380594       364724  36077   
8        75          Nouvelle-Aquitaine     4201774       711017  71629   
9        76                   Occitanie     3892146       642412  60912   
10       84        Auvergne-Rhône-Alpes     4334439       759462  66232   
11       93  Provence-Alpes-Côte d'Azur     2459998       474066  31690   
12       94              

KeyError: 'code_reg'