# <p style="text-align: center;">Insee GPS</p>

## Import libraries

In [1]:
import pandas as pd
import numpy as np

import glob
import os

import requests

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

## Set parameters

In [32]:
decree_filename_base = 'arrete_'
decrees_folder_name = './../../data/raw/decrees'
communes_folder_name = './../../data/raw/opendatasoft'
processed_data_folder_name = './../../data/processed'
decrees_filename = 'decrees.parquet'
communes_csv_filename = 'correspondance-code-insee-code-postal.csv'

## Import Decrees

In [21]:


df = pd.read_parquet(os.path.join(processed_data_folder_name, decrees_filename))

In [22]:
df.isna().sum() 

insee               0
nom_commune         0
debut_evenement     0
fin_evenement       0
date_arrete         0
date_parution_jo    0
nom_peril           0
code_peril          0
franchise           0
code_nor            0
decision            0
dtype: int64

In [5]:
df.head()

Unnamed: 0,insee,nom_commune,debut_evenement,fin_evenement,date_arrete,date_parution_jo,nom_peril,code_peril,franchise,code_nor,decision
43661,5063,LA GRAVE,2001-03-19,2001-03-25,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue
43662,5128,SAINT ANDRE D EMBRUN,2001-03-29,2001-03-29,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue
43663,6031,CANTARON,2000-11-23,2000-11-24,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue
43664,6031,CANTARON,2000-11-23,2000-11-24,2001-11-15,2001-12-01,Inondations et/ou Coulées de Boue,ICB,Simple,INTE0100649A,Reconnue
43665,6057,L'ESCARENE,2000-10-30,2000-10-31,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue


## Get Insee gps coodinates of communes.

In [6]:
df['decision'].value_counts()

decision
Reconnue                                   130517
Reconnue(sans impact sur la modulation)     65206
Non reconnue                                39739
Name: count, dtype: int64

In [7]:
def get_coordinates_from_insee_api(insee_code):
    url = f"https://api-adresse.data.gouv.fr/search/?q={insee_code}&limit=1"
    response = requests.get(url)
    data = response.json()
    if data['features']:
        coordinates = data['features'][0]['geometry']['coordinates']
        return coordinates[1], coordinates[0]  # Retourne (latitude, longitude)
    else:
        return None, None  # Retourne None si le code INSEE est invalide



In [8]:
# Test de la fonction
latitude, longitude = get_coordinates_from_insee_api('75101')
print(f"Latitude: {latitude}, Longitude: {longitude}")

Latitude: 48.867945, Longitude: 2.326594


In [9]:
# Récupérer les codes INSEE uniques
unique_insee_codes = df['insee'].unique()

In [10]:
unique_insee_codes[:10]

array(['5063', '5128', '6031', '6057', '6113', '6157', '9138', '9160',
       '9282', '12018'], dtype=object)

In [15]:
df[df['insee'].isin(['5063', '5128', '6031', '6057', '6113', '6157', '9138', '9160',
       '9282', '12018'])]

Unnamed: 0,insee,nom_commune,debut_evenement,fin_evenement,date_arrete,date_parution_jo,nom_peril,code_peril,franchise,code_nor,decision
43661,5063,LA GRAVE,2001-03-19,2001-03-25,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue
43662,5128,SAINT ANDRE D EMBRUN,2001-03-29,2001-03-29,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue
43663,6031,CANTARON,2000-11-23,2000-11-24,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue
43664,6031,CANTARON,2000-11-23,2000-11-24,2001-11-15,2001-12-01,Inondations et/ou Coulées de Boue,ICB,Simple,INTE0100649A,Reconnue
43665,6057,L'ESCARENE,2000-10-30,2000-10-31,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue
...,...,...,...,...,...,...,...,...,...,...,...
280642,6157,VENCE,2017-06-21,2017-12-31,2023-07-21,2023-09-08,Sécheresse,SEC,-,IOME2313528A,Non reconnue
280676,9138,L'HERM,2022-07-01,2022-09-30,2023-07-21,2023-09-08,Sécheresse,SEC,Simple,IOME2313528A,Reconnue
280679,9160,LAVELANET,2022-07-01,2022-09-30,2023-07-21,2023-09-08,Sécheresse,SEC,Simple,IOME2313528A,Reconnue
288294,5063,LA GRAVE,2023-12-10,2023-12-12,2024-02-12,2024-02-23,Inondations et/ou Coulées de Boue,ICB,Simple,IOME2403657A,Reconnue


In [11]:
len(unique_insee_codes)

37496

In [12]:
# Obtenir les coordonnées pour chaque code INSEE unique
coordinates_dict = {code: get_coordinates_from_insee_api(code) for code in unique_insee_codes[:10]}

In [13]:
coordinates_dict

{'5063': (None, None),
 '5128': (None, None),
 '6031': (None, None),
 '6057': (None, None),
 '6113': (None, None),
 '6157': (None, None),
 '9138': (None, None),
 '9160': (None, None),
 '9282': (None, None),
 '12018': (None, None)}

In [9]:
# Ajout des colonnes latitude et longitude
df['latitude'] = df['insee'].apply(lambda x: get_coordinates_from_insee_api(x)[0])
df['longitude'] = df['insee'].apply(lambda x: get_coordinates_from_insee_api(x)[1])

KeyError: 'features'

In [7]:
df.head()

Unnamed: 0,insee,nom_commune,debut_evenement,fin_evenement,date_arrete,date_parution_jo,nom_peril,code_peril,franchise,code_nor,decision
0,1386,SAINT SORLIN EN BUGEY,1982-11-06,1982-11-10,1982-11-30,1982-12-02,Tempête,TMP,Simple,,Reconnue
1,1386,SAINT SORLIN EN BUGEY,1982-11-06,1982-11-10,1982-11-30,1982-12-02,Inondations et/ou Coulées de Boue,ICB,Simple,,Reconnue
2,6073,ISOLA,1982-11-06,1982-11-10,1982-11-30,1982-12-02,Tempête,TMP,Simple,,Reconnue
3,6073,ISOLA,1982-11-06,1982-11-10,1982-11-30,1982-12-02,Inondations et/ou Coulées de Boue,ICB,Simple,,Reconnue
4,6088,NICE,1982-11-06,1982-11-10,1982-11-30,1982-12-02,Tempête,TMP,Simple,,Reconnue


In [18]:
#!pip install geopy

In [19]:
# Installez la bibliothèque geopy si vous ne l'avez pas déjà fait
# pip install geopy

from geopy.geocoders import Nominatim

def get_coordinates_from_insee_code(insee_code):
    # Créez un géocodeur Nominatim
    geolocator = Nominatim(user_agent="my_geocoder")

    # Recherchez la localisation en utilisant le code INSEE
    location = geolocator.geocode(insee_code)

    if location:
        latitude, longitude = location.latitude, location.longitude
        return latitude, longitude
    else:
        return None

# Utilisation de la fonction pour récupérer les coordonnées
insee_code = "5063"
coordinates = get_coordinates_from_insee_code(insee_code)

if coordinates:
    latitude, longitude = coordinates
    print(f"Coordonnées GPS pour le code INSEE {insee_code}:")
    print(f"Latitude: {latitude:.6f}")
    print(f"Longitude: {longitude:.6f}")
else:
    print(f"Impossible de trouver les coordonnées pour le code INSEE {insee_code}.")


Coordonnées GPS pour le code INSEE 5063:
Latitude: 47.461593
Longitude: 7.998588


In [24]:
# Utilisez drop_duplicates() pour obtenir les codes INSEE uniques
unique_codes_df = df[['insee']].drop_duplicates()

# Affichez le nouveau dataframe
print(unique_codes_df)

        insee
43661    5063
43662    5128
43663    6031
43665    6057
43668    6113
...       ...
290396  21550
290398  21574
290400  21619
290464  26134
290971  38208

[37496 rows x 1 columns]


In [26]:
from geopandas.tools import geocode

# Créez un DataFrame avec la colonne 'insee' contenant les codes INSEE des communes
#data = {'insee': ['75001', '13055', '69001']}  # Exemple de codes INSEE
#df = pd.DataFrame(data)

# Utilisez GeoPandas pour géocoder les coordonnées à partir des codes INSEE
gdf = geocode(unique_codes_df['insee'], provider='nominatim', user_agent='mon_geocodeur')

# Ajoutez les colonnes 'latitude' et 'longitude' au DataFrame
unique_codes_df['latitude'] = gdf['geometry'].y
unique_codes_df['longitude'] = gdf['geometry'].x

# Affichez le DataFrame enrichi
print(unique_codes_df)

KeyboardInterrupt: 

In [33]:
 df_communes = pd.read_csv(os.path.join(communes_folder_name, communes_csv_filename), sep=";")

In [36]:
df_communes.shape

(36742, 17)

In [37]:
df_communes.columns

Index(['Code INSEE', 'Code Postal', 'Commune', 'Département', 'Région',
       'Statut', 'Altitude Moyenne', 'Superficie', 'Population',
       'geo_point_2d', 'geo_shape', 'ID Geofla', 'Code Commune', 'Code Canton',
       'Code Arrondissement', 'Code Département', 'Code Région'],
      dtype='object')

In [39]:
df_communes['Code INSEE'].is_unique

True

In [42]:
# Merge DataFrames based on 'insee'
merged_df = pd.merge(df, df_communes[['Code INSEE', 'geo_point_2d']],  left_on='insee', right_on='Code INSEE', how='left')

In [43]:
merged_df

Unnamed: 0,insee,nom_commune,debut_evenement,fin_evenement,date_arrete,date_parution_jo,nom_peril,code_peril,franchise,code_nor,decision,Code INSEE,geo_point_2d
0,5063,LA GRAVE,2001-03-19,2001-03-25,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue,,
1,5128,SAINT ANDRE D EMBRUN,2001-03-29,2001-03-29,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue,,
2,6031,CANTARON,2000-11-23,2000-11-24,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue,,
3,6031,CANTARON,2000-11-23,2000-11-24,2001-11-15,2001-12-01,Inondations et/ou Coulées de Boue,ICB,Simple,INTE0100649A,Reconnue,,
4,6057,L'ESCARENE,2000-10-30,2000-10-31,2001-11-15,2001-12-01,Mouvement de Terrain,MVT,Simple,INTE0100649A,Reconnue,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
235457,90035,DORANS,2023-06-01,2023-09-30,2024-06-18,2024-07-02,Sécheresse,SEC,-,IOME2415881A,Non reconnue,90035,"47.58474462701379, 6.837247587105974"
235458,90039,ESSERT,2023-01-01,2023-12-31,2024-06-18,2024-07-02,Sécheresse,SEC,-,IOME2415881A,Non reconnue,90039,"47.63670149621771, 6.811982362490323"
235459,90045,FECHE L EGLISE,2023-07-01,2023-09-30,2024-06-18,2024-07-02,Sécheresse,SEC,-,IOME2415881A,Non reconnue,90045,"47.502176741594155, 6.955880312380269"
235460,90068,MEROUX MOVAL,2023-01-01,2023-10-31,2024-06-18,2024-07-02,Sécheresse,SEC,-,IOME2415881A,Non reconnue,90068,"47.59254610319736, 6.9095726149755095"


In [45]:
merged_df.isna().sum() 

insee                   0
nom_commune             0
debut_evenement         0
fin_evenement           0
date_arrete             0
date_parution_jo        0
nom_peril               0
code_peril              0
franchise               0
code_nor                0
decision                0
Code INSEE          13818
geo_point_2d        13818
dtype: int64

In [46]:
from geopy.geocoders import Nominatim

# Créez un objet géocodeur
geolocator = Nominatim(user_agent="my_geocoder")

# Recherchez les coordonnées pour le code INSEE 5063
location = geolocator.geocode("5063, France")

if location:
    print(f"Latitude: {location.latitude}, Longitude: {location.longitude}")
else:
    print("Code INSEE 5063 introuvable.")


Latitude: 46.2337295, Longitude: 5.3538775


In [48]:
n_distinct_values = merged_df[merged_df['geo_point_2d'].isnull()]['insee'].nunique()
n_distinct_values 


3100

In [49]:
distinct_values = merged_df[merged_df['geo_point_2d'].isnull()]['insee'].drop_duplicates()
distinct_values 

0         5063
1         5128
2         6031
4         6057
7         6113
          ... 
231036    5108
231039    5152
231042    5183
231203    4137
232161    1105
Name: insee, Length: 3100, dtype: object

In [51]:
df[df['insee']=='5152']

Unnamed: 0,insee,nom_commune,debut_evenement,fin_evenement,date_arrete,date_parution_jo,nom_peril,code_peril,franchise,code_nor,decision
287380,5152,SAINT MAURICE EN VALGODEMARD,2023-10-19,2023-10-20,2024-01-18,2024-01-30,Inondations et/ou Coulées de Boue,ICB,Simple,IOME2400969A,Reconnue


In [52]:
distinct_col1b = df['insee'].unique()

In [54]:
len(distinct_col1)

37496

In [55]:
len(df_communes['Code INSEE'])

36742

In [56]:

df1['is_in_df2'] = distinct_col1.isin(df_communes['Code INSEE'])

AttributeError: 'numpy.ndarray' object has no attribute 'isin'

In [None]:
# Ensure that the columns are of the same type
df1['col1'] = df1['col1'].astype(str)
df2['col2'] = df2['col2'].astype(str)

# Find distinct values of col1 of df1 that are not in col2 of df2
distinct_values = df1[~df1['col1'].isin(df2['col2'])]['col1'].nunique()

print(f'There are {distinct_values} distinct values of col1 in df1 that are not in col2 of df2.')


In [57]:
distinct_values = df[~df['insee'].isin(df_communes['Code INSEE'])]['insee'].nunique()
distinct_values

3100