# <p style="text-align: center;">Get Locations</p>

## Import libraries

In [1]:
import pandas as pd
import numpy as np

import glob
import os

import requests

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

## Set parameters

In [2]:
decree_filename_base = 'arrete_'
decrees_folder_name = './../../data/raw/decrees'
communes_folder_name = './../../data/raw/opendatasoft'
processed_data_folder_name = './../../data/processed'
decrees_filename = 'decrees.parquet'
decrees_locations_filename = 'decrees_locations.parquet'
communes_csv_filename = 'correspondance-code-insee-code-postal.csv'

In [3]:
(os.path.join(communes_folder_name, communes_csv_filename))

'./../../data/raw/opendatasoft/correspondance-code-insee-code-postal.csv'

## Import Decrees

In [4]:


df = pd.read_parquet(os.path.join(processed_data_folder_name, decrees_filename))

In [5]:
df.isna().sum() 

insee               0
nom_commune         0
debut_evenement     0
fin_evenement       0
date_arrete         0
date_parution_jo    0
nom_peril           0
code_peril          0
franchise           0
code_nor            0
decision            0
dtype: int64

In [6]:
df.head()

Unnamed: 0,insee,nom_commune,debut_evenement,fin_evenement,date_arrete,date_parution_jo,nom_peril,code_peril,franchise,code_nor,decision
43661,5063,LA GRAVE,2001-03-19,2001-03-25,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue
43662,5128,SAINT ANDRE D EMBRUN,2001-03-29,2001-03-29,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue
43663,6031,CANTARON,2000-11-23,2000-11-24,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue
43664,6031,CANTARON,2000-11-23,2000-11-24,2001-11-15,2001-12-01,Inondations et/ou Coulées de Boue,ICB,Simple,INTE0100649A,Reconnue
43665,6057,L'ESCARENE,2000-10-30,2000-10-31,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue


In [7]:
df['decision'].value_counts()

decision
Reconnue                                   130517
Reconnue(sans impact sur la modulation)     65206
Non reconnue                                39739
Name: count, dtype: int64

## Get Insee gps coodinates of communes.

In [8]:
communes_location = pd.read_csv(os.path.join(communes_folder_name, communes_csv_filename), sep=";")

In [9]:
communes_location.columns

Index(['Code INSEE', 'Code Postal', 'Commune', 'Département', 'Région',
       'Statut', 'Altitude Moyenne', 'Superficie', 'Population',
       'geo_point_2d', 'geo_shape', 'ID Geofla', 'Code Commune', 'Code Canton',
       'Code Arrondissement', 'Code Département', 'Code Région'],
      dtype='object')

In [10]:
communes_location.head()

Unnamed: 0,Code INSEE,Code Postal,Commune,Département,Région,Statut,Altitude Moyenne,Superficie,Population,geo_point_2d,geo_shape,ID Geofla,Code Commune,Code Canton,Code Arrondissement,Code Département,Code Région
0,59416,59190,MORBECQUE,['NORD'],['NORD-PAS-DE-CALAIS'],['Commune simple'],23.0,4455.0,2.7,"50.67689342861573, 2.536216144331492","{""coordinates"": [[[2.501239302134784, 50.63986...",1237,416,30,4,59,31
1,22102,22330,LANGOURLA,"[""COTES-D'ARMOR""]",['BRETAGNE'],['Commune simple'],170.0,2165.0,0.6,"48.284641107667674, -2.415501011324659","{""coordinates"": [[[-2.416298136623701, 48.2530...",10183,102,9,1,22,53
2,31225,31310,GOUTEVERNISSE,['HAUTE-GARONNE'],['MIDI-PYRENEES'],['Commune simple'],264.0,485.0,0.2,"43.214026301449536, 1.173765920286677","{""coordinates"": [[[1.165580460427912, 43.20037...",20555,225,27,1,31,73
3,23025,23220,BONNAT,['CREUSE'],['LIMOUSIN'],['Chef-lieu canton'],355.0,4554.0,1.3,"46.32301933418213, 1.913450631364578","{""coordinates"": [[[1.952331841746855, 46.28624...",3586,25,6,2,23,74
4,38522,38740,VALJOUFFREY,['ISERE'],['RHONE-ALPES'],['Commune simple'],2009.0,12644.0,0.1,"44.88153491127053, 6.07950399615059","{""coordinates"": [[[6.056489736635054, 44.81581...",1082,522,36,1,38,82


In [11]:
communes_location.isna().sum() 

Code INSEE             0
Code Postal            0
Commune                0
Département            0
Région                 0
Statut                 0
Altitude Moyenne       0
Superficie             0
Population             0
geo_point_2d           0
geo_shape              0
ID Geofla              0
Code Commune           0
Code Canton            0
Code Arrondissement    0
Code Département       0
Code Région            0
dtype: int64

In [12]:
communes_location['Code INSEE'].is_unique

True

In [13]:
# Merge DataFrames based on 'insee'

merged_df = pd.merge(df, communes_location[['Code INSEE', 'Code Postal', 'Commune', 'Département', 'Région',
       'Statut', 'Altitude Moyenne', 'Superficie', 'Population',
       'geo_point_2d', 'geo_shape', 'ID Geofla', 'Code Commune', 'Code Canton',
       'Code Arrondissement', 'Code Département', 'Code Région']],  left_on='insee', right_on='Code INSEE', how='left')

In [14]:
df.shape

(235462, 11)

In [15]:
merged_df.shape

(235462, 28)

In [16]:
merged_df.isna().sum() 

insee                      0
nom_commune                0
debut_evenement            0
fin_evenement              0
date_arrete                0
date_parution_jo           0
nom_peril                  0
code_peril                 0
franchise                  0
code_nor                   0
decision                   0
Code INSEE             13818
Code Postal            13818
Commune                13818
Département            13818
Région                 13818
Statut                 13818
Altitude Moyenne       13818
Superficie             13818
Population             13818
geo_point_2d           13818
geo_shape              13818
ID Geofla              13818
Code Commune           13818
Code Canton            13818
Code Arrondissement    13818
Code Département       13818
Code Région            13818
dtype: int64

In [17]:
merged_df

Unnamed: 0,insee,nom_commune,debut_evenement,fin_evenement,date_arrete,date_parution_jo,nom_peril,code_peril,franchise,code_nor,...,Superficie,Population,geo_point_2d,geo_shape,ID Geofla,Code Commune,Code Canton,Code Arrondissement,Code Département,Code Région
0,5063,LA GRAVE,2001-03-19,2001-03-25,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,...,,,,,,,,,,
1,5128,SAINT ANDRE D EMBRUN,2001-03-29,2001-03-29,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,...,,,,,,,,,,
2,6031,CANTARON,2000-11-23,2000-11-24,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,...,,,,,,,,,,
3,6031,CANTARON,2000-11-23,2000-11-24,2001-11-15,2001-12-01,Inondations et/ou Coulées de Boue,ICB,Simple,INTE0100649A,...,,,,,,,,,,
4,6057,L'ESCARENE,2000-10-30,2000-10-31,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235457,90035,DORANS,2023-06-01,2023-09-30,2024-06-18,2024-07-02,Sécheresse,SEC,-,IOME2415881A,...,382.0,0.6,"47.58474462701379, 6.837247587105974","{""coordinates"": [[[6.821011292623255, 47.57432...",20508.0,35.0,10.0,1.0,90,43.0
235458,90039,ESSERT,2023-01-01,2023-12-31,2024-06-18,2024-07-02,Sécheresse,SEC,-,IOME2415881A,...,710.0,3.2,"47.63670149621771, 6.811982362490323","{""coordinates"": [[[6.807038927428798, 47.62266...",33825.0,39.0,6.0,1.0,90,43.0
235459,90045,FECHE L EGLISE,2023-07-01,2023-09-30,2024-06-18,2024-07-02,Sécheresse,SEC,-,IOME2415881A,...,399.0,0.8,"47.502176741594155, 6.955880312380269","{""coordinates"": [[[6.952446414801375, 47.48493...",25959.0,45.0,12.0,1.0,90,43.0
235460,90068,MEROUX MOVAL,2023-01-01,2023-10-31,2024-06-18,2024-07-02,Sécheresse,SEC,-,IOME2415881A,...,890.0,0.8,"47.59254610319736, 6.9095726149755095","{""coordinates"": [[[6.921865974123075, 47.57813...",14506.0,68.0,11.0,1.0,90,43.0


In [18]:
unique_values = merged_df[merged_df['Code INSEE'].isnull()]['nom_commune'].unique()

print(unique_values)

['LA GRAVE' 'SAINT ANDRE D EMBRUN' 'CANTARON' ... 'VILLAR SAINT PANCRACE'
 'NIBLES' 'CIVRIEUX']


In [19]:
unique_values.shape

(3061,)

In [20]:
merged_df_no_locations = merged_df[merged_df['Code INSEE'].isnull()]

In [21]:
merged_df_no_locations['debut_evenement'].max()

Timestamp('2024-05-24 00:00:00')

In [22]:
merged_df_no_locations['debut_evenement'].min()

Timestamp('1985-01-01 00:00:00')

In [23]:
merged_df_no_locations.head()

Unnamed: 0,insee,nom_commune,debut_evenement,fin_evenement,date_arrete,date_parution_jo,nom_peril,code_peril,franchise,code_nor,...,Superficie,Population,geo_point_2d,geo_shape,ID Geofla,Code Commune,Code Canton,Code Arrondissement,Code Département,Code Région
0,5063,LA GRAVE,2001-03-19,2001-03-25,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,...,,,,,,,,,,
1,5128,SAINT ANDRE D EMBRUN,2001-03-29,2001-03-29,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,...,,,,,,,,,,
2,6031,CANTARON,2000-11-23,2000-11-24,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,...,,,,,,,,,,
3,6031,CANTARON,2000-11-23,2000-11-24,2001-11-15,2001-12-01,Inondations et/ou Coulées de Boue,ICB,Simple,INTE0100649A,...,,,,,,,,,,
4,6057,L'ESCARENE,2000-10-30,2000-10-31,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,...,,,,,,,,,,


In [24]:
merged_df_locations = merged_df[merged_df['Code INSEE'].notnull()]

In [25]:
merged_df_locations.head()

Unnamed: 0,insee,nom_commune,debut_evenement,fin_evenement,date_arrete,date_parution_jo,nom_peril,code_peril,franchise,code_nor,...,Superficie,Population,geo_point_2d,geo_shape,ID Geofla,Code Commune,Code Canton,Code Arrondissement,Code Département,Code Région
12,12018,BALAGUIER D OLT,1990-01-01,1990-09-30,2001-11-15,2001-12-01,Sécheresse,SEC,Simple,INTE0100649A,...,1098.0,0.1,"44.5189126443148, 1.9771926377212372","{""coordinates"": [[[1.989175719192402, 44.49934...",15146.0,18.0,6.0,3.0,12,73.0
13,12018,BALAGUIER D OLT,1992-03-01,1992-06-30,2001-11-15,2001-12-01,Sécheresse,SEC,Simple,INTE0100649A,...,1098.0,0.1,"44.5189126443148, 1.9771926377212372","{""coordinates"": [[[1.989175719192402, 44.49934...",15146.0,18.0,6.0,3.0,12,73.0
14,14555,SAINT ANDRE D HEBERTOT,2000-11-06,2000-11-11,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,...,988.0,0.5,"49.30322090380799, 0.291910061831684","{""coordinates"": [[[0.30899812835816304, 49.283...",5555.0,555.0,5.0,3.0,14,25.0
15,14755,VILLERVILLE,2001-03-21,2001-03-23,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,...,342.0,0.8,"49.39087631534191, 0.123148235678736","{""coordinates"": [[[0.127262288895329, 49.37693...",14618.0,755.0,35.0,3.0,14,25.0
16,17118,CORIGNAC,1989-05-01,1989-12-31,2001-11-15,2001-12-01,Sécheresse,SEC,Simple,INTE0100649A,...,1112.0,0.3,"45.244860498269, -0.39280322020985503","{""coordinates"": [[[-0.41318672381903404, 45.22...",27409.0,118.0,17.0,1.0,17,54.0


In [26]:
merged_df_locations.isnull().sum()

insee                  0
nom_commune            0
debut_evenement        0
fin_evenement          0
date_arrete            0
date_parution_jo       0
nom_peril              0
code_peril             0
franchise              0
code_nor               0
decision               0
Code INSEE             0
Code Postal            0
Commune                0
Département            0
Région                 0
Statut                 0
Altitude Moyenne       0
Superficie             0
Population             0
geo_point_2d           0
geo_shape              0
ID Geofla              0
Code Commune           0
Code Canton            0
Code Arrondissement    0
Code Département       0
Code Région            0
dtype: int64

In [27]:
merged_df_locations.shape

(221644, 28)

In [28]:
merged_df_locations['debut_evenement'].max()

Timestamp('2024-05-24 00:00:00')

In [29]:
merged_df_locations['debut_evenement'].min()

Timestamp('1985-05-12 00:00:00')

In [30]:
len(merged_df_locations['Code INSEE'].unique())

34396

In [31]:
# save to parquet
merged_df_locations.to_parquet(os.path.join(processed_data_folder_name, decrees_locations_filename))

## Test reloading the dataframe from parquet

In [32]:
# test reloading the dataframe from parquet
import pandas as pd

df_reloaded = pd.read_parquet(os.path.join(processed_data_folder_name, decrees_locations_filename))




In [33]:
df_reloaded.head()

Unnamed: 0,insee,nom_commune,debut_evenement,fin_evenement,date_arrete,date_parution_jo,nom_peril,code_peril,franchise,code_nor,...,Superficie,Population,geo_point_2d,geo_shape,ID Geofla,Code Commune,Code Canton,Code Arrondissement,Code Département,Code Région
12,12018,BALAGUIER D OLT,1990-01-01,1990-09-30,2001-11-15,2001-12-01,Sécheresse,SEC,Simple,INTE0100649A,...,1098.0,0.1,"44.5189126443148, 1.9771926377212372","{""coordinates"": [[[1.989175719192402, 44.49934...",15146.0,18.0,6.0,3.0,12,73.0
13,12018,BALAGUIER D OLT,1992-03-01,1992-06-30,2001-11-15,2001-12-01,Sécheresse,SEC,Simple,INTE0100649A,...,1098.0,0.1,"44.5189126443148, 1.9771926377212372","{""coordinates"": [[[1.989175719192402, 44.49934...",15146.0,18.0,6.0,3.0,12,73.0
14,14555,SAINT ANDRE D HEBERTOT,2000-11-06,2000-11-11,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,...,988.0,0.5,"49.30322090380799, 0.291910061831684","{""coordinates"": [[[0.30899812835816304, 49.283...",5555.0,555.0,5.0,3.0,14,25.0
15,14755,VILLERVILLE,2001-03-21,2001-03-23,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,...,342.0,0.8,"49.39087631534191, 0.123148235678736","{""coordinates"": [[[0.127262288895329, 49.37693...",14618.0,755.0,35.0,3.0,14,25.0
16,17118,CORIGNAC,1989-05-01,1989-12-31,2001-11-15,2001-12-01,Sécheresse,SEC,Simple,INTE0100649A,...,1112.0,0.3,"45.244860498269, -0.39280322020985503","{""coordinates"": [[[-0.41318672381903404, 45.22...",27409.0,118.0,17.0,1.0,17,54.0


In [35]:
df_reloaded.equals(merged_df_locations)

True

In [36]:
df_reloaded.describe()

Unnamed: 0,debut_evenement,fin_evenement,date_arrete,date_parution_jo,Altitude Moyenne,Superficie,Population,ID Geofla,Code Commune,Code Canton,Code Arrondissement,Code Région
count,221644,221644,221644,221644,221644.0,221644.0,221644.0,221644.0,221644.0,221644.0,221644.0,221644.0
mean,2005-11-10 03:19:10.926711168,2006-02-16 10:27:57.890671488,2006-08-06 20:27:42.428037888,2006-08-18 15:24:08.744834048,210.044459,1818.752,3.692151,17565.90487,239.77176,22.711853,2.292469,52.248579
min,1985-05-12 00:00:00,1985-05-15 00:00:00,1987-06-24 00:00:00,1987-07-10 00:00:00,0.0,2.0,0.0,1.0,1.0,1.0,0.0,1.0
25%,1999-12-25 00:00:00,1999-12-29 00:00:00,1999-12-29 00:00:00,1999-12-30 00:00:00,80.0,727.0,0.3,8120.0,100.0,10.0,1.0,26.0
50%,2003-07-01 00:00:00,2003-09-22 00:00:00,2003-12-12 00:00:00,2003-12-13 00:00:00,143.0,1234.0,0.7,17138.0,207.0,19.0,2.0,53.0
75%,2016-01-01 00:00:00,2016-03-31 00:00:00,2016-09-16 00:00:00,2016-10-20 00:00:00,252.0,2152.0,2.1,27022.25,343.0,30.0,3.0,73.0
max,2024-05-24 00:00:00,2024-05-26 00:00:00,2024-06-18 00:00:00,2024-07-02 00:00:00,2713.0,1871833.0,440.2,36613.0,909.0,99.0,9.0,94.0
std,,,,,232.417908,6292.843,14.831298,10767.252219,174.684157,18.132225,1.204191,25.544537


In [37]:
merged_df_locations.describe()

Unnamed: 0,debut_evenement,fin_evenement,date_arrete,date_parution_jo,Altitude Moyenne,Superficie,Population,ID Geofla,Code Commune,Code Canton,Code Arrondissement,Code Région
count,221644,221644,221644,221644,221644.0,221644.0,221644.0,221644.0,221644.0,221644.0,221644.0,221644.0
mean,2005-11-10 03:19:10.926711168,2006-02-16 10:27:57.890671488,2006-08-06 20:27:42.428037888,2006-08-18 15:24:08.744834048,210.044459,1818.752,3.692151,17565.90487,239.77176,22.711853,2.292469,52.248579
min,1985-05-12 00:00:00,1985-05-15 00:00:00,1987-06-24 00:00:00,1987-07-10 00:00:00,0.0,2.0,0.0,1.0,1.0,1.0,0.0,1.0
25%,1999-12-25 00:00:00,1999-12-29 00:00:00,1999-12-29 00:00:00,1999-12-30 00:00:00,80.0,727.0,0.3,8120.0,100.0,10.0,1.0,26.0
50%,2003-07-01 00:00:00,2003-09-22 00:00:00,2003-12-12 00:00:00,2003-12-13 00:00:00,143.0,1234.0,0.7,17138.0,207.0,19.0,2.0,53.0
75%,2016-01-01 00:00:00,2016-03-31 00:00:00,2016-09-16 00:00:00,2016-10-20 00:00:00,252.0,2152.0,2.1,27022.25,343.0,30.0,3.0,73.0
max,2024-05-24 00:00:00,2024-05-26 00:00:00,2024-06-18 00:00:00,2024-07-02 00:00:00,2713.0,1871833.0,440.2,36613.0,909.0,99.0,9.0,94.0
std,,,,,232.417908,6292.843,14.831298,10767.252219,174.684157,18.132225,1.204191,25.544537
