# Geocoding the BAAC database

In [1]:
# Import packages
# Operational
import os
from os.path import isfile, join
import zipfile

# Basic
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Scale for maps
from matplotlib_scalebar.scalebar import ScaleBar
# Personalised legends for maps
from matplotlib.patches import Patch
from matplotlib.lines import Line2D

# Geospatial
import geopandas as gpd
import rasterio as rio
import fiona
import contextily
import osmnx
from shapely import geometry

In [2]:
# Working directory
os.chdir("/Users/unaioyon/Desktop/masters_thesis/data/fra")

In [63]:
# Import the data
# 2005 - 2016
acc1 = pd.read_csv("accidents/geolocated_tristramgrabener/paris/accidents_paris.csv",
                   sep = ";",
                  low_memory = False)

In [19]:
acc1.iloc[:,21:29].head()
print(acc1["longitude"].isna().sum())

12220


In [None]:
# Import the data
# 2017 - 2020
for i in np.arange(2017, 2021):
    

In [58]:
##### IMPORT SLOW ZONES DATA
zones = gpd.read_file("zones_30/zones-30.shp")
print(zones.crs) # EPSG:4326 

EPSG:4326


In [None]:
##### IMPORT IRIS DATA
zones = gpd.read_file("zones_30/zones-30.shp")
print(zones.crs) # EPSG:4326 

## 1. Creating the geodataframes

In [64]:
# From points in the 2005-2016 Paris data (original CRS is WGS84 (EPSG:4326))
acc1 = gpd.GeoDataFrame(acc1,
                        crs = "EPSG:4326",
                       geometry = gpd.points_from_xy(x = acc1["longitude"], y = acc1["latitude"]))

## 2. Assigning accidents to: IRIS AND SLOW ZONES

### 2005 - 2016

In [65]:
# Create a new variable in acc1 (the Paris 2005-2016 accidents database)
# Slow zones
acc1.insert(67, "slow_zone", pd.Series([], dtype = "object"))
acc1.insert(68, "slow_zone_year", pd.Series([], dtype = "object"))
acc1.insert(69, "slow_zone_d", pd.Series([], dtype = "object"))

# IRIS
acc1.insert(70, "iris", pd.Series([], dtype = "object"))
acc1.insert(71, "iris_code", pd.Series([], dtype = "object"))
acc1.insert(72, "iris_name", pd.Series([], dtype = "object"))

In [66]:
%%time
# Classifying SLOW ZONES
# There are 9 zones with missing geometries, so these ones will not have any transaction inside
zones_non_na = zones[zones["geometry"].notna()].copy()

for i in zones_non_na.index:
    acc1.loc[acc1["geometry"].within(zones.loc[i, "geometry"]), "slow_zone"] = zones.loc[i, "nom_zca"]
    acc1.loc[acc1["geometry"].within(zones.loc[i, "geometry"]), "slow_zone_year"] = zones.loc[i, "year"]
    acc1.loc[acc1["geometry"].within(zones.loc[i, "geometry"]), "slow_zone_name"] = zones.loc[i, "year"]
    
# Add a dummy equal to 1 if inside a slow zone
acc1["slow_zone_d"] = 0
acc1.loc[acc1["slow_zone"].notna(), "slow_zone_d"] = 1

CPU times: user 36.9 s, sys: 114 ms, total: 37 s
Wall time: 37.1 s


In [68]:
# Evaluate the results
acc1["slow_zone_d"].describe() # 24.14 of the accidents happen inside slow zones

count    76399.000000
mean         0.241417
std          0.427945
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          1.000000
Name: slow_zone_d, dtype: float64

In [77]:
len(acc1["slow_zone"].unique()) # so, given there are 133 zones  with geometry, all of them have at least 1 accident

133

In [None]:
%%time
# Classifying IRIS

for i in iris.index:
    acc1.loc[dv3f["centroid"].within(iris.loc[i, "geometry"]), "iris"] = iris.loc[i, "IRIS"]
    acc1.loc[dv3f["centroid"].within(iris.loc[i, "geometry"]), "iris_code"] = iris.loc[i, "CODE_IRIS"]
    acc1.loc[dv3f["centroid"].within(iris.loc[i, "geometry"]), "iris_name"] = iris.loc[i, "NOM_IRIS"]

In [79]:
iris19.head(9)

Unnamed: 0,INSEE_COM,NOM_COM,IRIS,CODE_IRIS,NOM_IRIS,TYP_IRIS,geometry
0,72191,Mayet,0,721910000,Mayet,Z,"POLYGON ((497887.400 6747662.400, 497907.200 6..."
1,77248,Lesches,0,772480000,Lesches,Z,"POLYGON ((685757.700 6868592.300, 685832.500 6..."
2,51426,Péas,0,514260000,Péas,Z,"POLYGON ((757292.900 6847598.400, 757334.300 6..."
3,81199,Padiès,0,811990000,Padiès,Z,"POLYGON ((646146.900 6328153.200, 646124.900 6..."
4,59225,Feignies,102,592250102,Sud,H,"POLYGON ((767596.000 7022269.100, 767242.200 7..."
5,60397,Le Mesnil-Conteville,0,603970000,Le Mesnil-Conteville,Z,"POLYGON ((633388.600 6951720.500, 633333.400 6..."
6,38382,Saint-Égrève,104,383820104,Barnave-Saint-Robert,H,"POLYGON ((910055.200 6463692.500, 910060.100 6..."
7,14426,Le Mesnil-sur-Blangy,0,144260000,Le Mesnil-sur-Blangy,Z,"POLYGON ((499002.400 6909564.300, 499052.700 6..."
8,34317,La Vacquerie-et-Saint-Martin-de-Castries,0,343170000,La Vacquerie-et-Saint-Martin-de-Castries,Z,"POLYGON ((735477.100 6304551.300, 735491.800 6..."
