# Mapping Police Violence

## Reading Point data

In [51]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd


%matplotlib notebook

PID_Table = pd.read_csv('https://raw.githubusercontent.com/Police-Involved-Deaths-CA/Data/main/Spatial_Data/Geocoded_Points_not_Complete/PID_locations.csv')
PID_Table

Unnamed: 0,date,INDEX,id_victim,first_name,last_name,middle_name,age,gender,race,prov,...,Temp_Date,summary,ds_rank,Type,latitude,longitude,geocoding_attempt,geocoding_return,geocoding_notes,geocode_city
0,2000-01-30,238,0413_V1,Stuart,Mitchell,,49.0,Male,Not Specified,ON,...,2000.001,,,Police Killing,43.658858,-79.365514,Winchester & Parliament Toronto ON,"Parliament Street, Toronto, Ontario M5A 2Z6, C...",Passed,Toronto
1,2000-05-31,243,0440_V1,Carl,Ouellet,,34.0,Male,Not Specified,QC,...,2000.005,,,Police Killing,45.522990,-73.582297,Drolet St. Montreal QC,"Rue Drolet, Montréal, Quebec H2W 2L9, Canada",Passed,Montréal
2,2000-07-16,245,0439_V1,Luc,Aubert,,43.0,Male,Not Specified,QC,...,2000.007,,,Police Killing,45.523431,-73.554714,1580 Avenue Papineau Montreal QC,"1580 Avenue Papineau, Montréal, Quebec H2K 4H8...",Passed,Montréal
3,2000-07-17,246,0254_V1,Larry,Campbell,,52.0,Male,Not Specified,ON,...,2000.007,,,Police Killing,42.986913,-81.230434,Adelaide St North & Oxford St East London ON,"Adelaide Street, London, Ontario N6B 3H5, Canada",Passed,London
4,2000-07-18,247,0438_V1,Sãbastien,McNicoll,,26.0,Male,Not Specified,QC,...,2000.007,,,Police Involved Death,45.589869,-73.598522,8595 Lacordaire Blvd. Montreal QC,"8595 Boulevard Lacordaire, Montréal, Quebec H1...",Passed,Montréal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2019-08-02,1358,0599_V1,Viengxay,Chommany,,42.0,Male,Not Specified,MB,...,,"An unidentified man, 42, was Tasered as he fle...",1.0,Police Killing,49.914363,-97.076043,Consol Avenue Winnipeg MB,"Consol Avenue, Winnipeg, Manitoba R2K 1T4, Canada",Passed,Winnipeg
607,2019-05-02,1360,0601_V1,Not Specified,,,31.0,Male,Not Specified,ON,...,,"Responding to calls of a disturbance, police e...",1.0,Police Killing,45.606420,-74.590056,Abbott Street Hawkesbury ON,"Abbott Street, Hawkesbury, Ontario K6A 2C9, Ca...",Passed,Hawkesbury
608,2009-12-09,1377,MS_22,Johhny,Dawson,Bingo,,Male,Indigenous,BC,...,,,,Police Killing,49.281997,-123.098235,238 E Cordova St Vancouver BC,"238 East Cordova Street, Vancouver, British Co...",Passed,Vancouver
609,2020-10-08,1382,MS_27,Jackson,Diggle,,17.0,Male,Not Specified,BC,...,,,,Police Killing,49.160000,-122.780000,Fleetwood Surrey BC,"Fleetwood, Surrey, British Columbia, Canada",Passed,Surrey


# parse the data & convert to a geodataframe

In [52]:
PID_BC = PID_Table.loc[PID_Table['prov']=='BC']
PID_BC = gpd.GeoDataFrame(
                    PID_BC,
                    geometry=gpd.points_from_xy(PID_BC.longitude, PID_BC.latitude),
                    crs='WGS1984'
                        )

PID_BC.plot()
PID_BC.crs

<IPython.core.display.Javascript object>

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

## Read the Census Data, Re-project the points


In [53]:
# BC_Sub_Div = gpd.read_file('data/Census/SimplyAnalytics_Shapefiles_2021-11-07_04_12_18_e28bde0952164f8aa648e288cab05f1b.shp')
# BC_Sub_Div=BC_Sub_Div.to_crs('EPSG:3005')

BC_Sub_Div = gpd.read_file("data/Outputs/BC_Sub_Div.json", driver = "GeoJSON")

BC_Sub_Div.crs

PID_BC = PID_BC.to_crs(BC_Sub_Div.crs)

fig,ax=plt.subplots(figsize=(8,8))
BC_Sub_Div.plot(ax=ax)
PID_BC.plot(ax=ax,color='r',edgecolor='k')


ax.set_ylim(PID_BC.geometry.y.min()-5e4,PID_BC.geometry.y.max()+5e4)
ax.set_xlim(PID_BC.geometry.x.min()-5e4,PID_BC.geometry.x.max()+5e4)
ax.set_title('Police Involved Deaths 2016 - Present')

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Police Involved Deaths 2016 - Present')

# Vector Overlay


Lets import a points layer for some locations in BC and walk through a handful of vector overlay methods.


## Spatial Joins

We can use a [spatial join](https://geopandas.org/gallery/spatial_joins.html) to merge attributes between two layers based on location

In [54]:
fig,ax=plt.subplots(figsize=(8,8))

# Changin how to "right" will significantly increase the runtime
# and duplicate each province multiple times (once for each incident within it)
Test_Join = gpd.sjoin(PID_BC, BC_Sub_Div, how="left") 


BC_Sub_Div.plot(ax=ax)
## See if there are any locations "outside" the provincial boundaries
Test_Join.loc[Test_Join['name_right'].isnull()==False].plot(ax=ax,color='k',label='Joined')
Test_Join.loc[Test_Join['name_right'].isnull()].plot(ax=ax,color='r',label='Not Joined')

ax.legend()

Out = Test_Join.loc[Test_Join['name_right'].isnull()]

ax.set_ylim(PID_BC.geometry.y.min()-5e4,PID_BC.geometry.y.max()+5e4)
ax.set_xlim(PID_BC.geometry.x.min()-5e4,PID_BC.geometry.x.max()+5e4)
ax.set_title('Police Involved Deaths 2016 - Present')

print('Not Joined: ',Test_Join.loc[Test_Join['name_right'].isnull()].count()['INDEX'])

Test_Join.head()

<IPython.core.display.Javascript object>

Not Joined:  0




Unnamed: 0,date,INDEX,id_victim,first_name,last_name,middle_name,age,gender,race,prov,...,index_right,spatial_id,name_right,Indigenous Identity,Population,Visible Minority,Indigenous_Pct,Pop_Density,Community Type,Community_Type
6,2000-08-21,250,0267_V1,Mladen,Cujko,,33.0,Male,Not Specified,BC,...,190,5917044,"Langford, BC (CSD)",2636.0,42691.0,4944.0,6.0,1043.702367,,Medium Poulation Center
18,2002-05-12,303,0266_V1,Paulsey,Alphonse,,67.0,Male,Indigenous,BC,...,496,5941009,"Williams Lake, BC (CSD)",3739.0,11397.0,788.0,33.0,314.577224,Rural,Rural
21,2002-08-01,309,0269_V1,Jeffrey,Berg,Michael,37.0,Male,White,BC,...,147,5915022,"Vancouver, BC (CSD)",16168.0,694440.0,371938.0,2.0,5964.104048,,Large Urban Center
24,2002-09-27,317,0479_V1,Darrell,Paquette,Raymond,43.0,Male,Not Specified,BC,...,698,5953023,"Prince George, BC (CSD)",13842.0,82094.0,7823.0,17.0,248.456005,Rural,Rural
27,2003-07-14,339,0277_V1,Keyvan,Tabesh,,18.0,Male,White,BC,...,154,5915043,"Port Moody, BC (CSD)",893.0,39217.0,13328.0,2.0,1485.877021,,Medium Poulation Center


## Point In Polygon Analysis

The spatial join method is useful in some cases, but for others, it produces a lot of redundancy.  If your goal is to calculate the number of points per polygon, we can do a point in polygon analysis using the [.within()](https://geopandas.org/docs/reference/api/geopandas.GeoSeries.within.html#geopandas.GeoSeries.within) method. 

In [55]:
BC_Sub_Div['Deaths'] = 0.0
for i,row in BC_Sub_Div.iterrows():
#     print(i)
    pip = PID_BC.within(row['geometry'])
#     print(pip)
    if pip.sum()>0:
        BC_Sub_Div.loc[BC_Sub_Div.index==i,'Deaths']+=pip.sum()
print(BC_Sub_Div['Deaths'].describe())

count    737.000000
mean       0.172320
std        1.099107
min        0.000000
25%        0.000000
50%        0.000000
75%        0.000000
max       19.000000
Name: Deaths, dtype: float64


In [94]:
BC_Sub_Div['Population'].describe()

BC_Sub_Div['Mask']=0
BC_Sub_Div.loc[BC_Sub_Div['Population']*.2<30,'Mask']=1

print(BC_Sub_Div.groupby('Mask').count())

      spatial_id  name  Indigenous Identity  Population  Visible Minority  \
Mask                                                                        
0            437   437                  437         437               437   
1            300   300                  300         300               300   

      Indigenous_Pct  Pop_Density  Community Type  Community_Type  geometry  \
Mask                                                                          
0                437          437             373             437       437   
1                258          300             300             300       300   

      Deaths  Kernel_Density  KD_Norm  
Mask                                   
0        437             396      396  
1        300             187      187  


In [56]:
fig,ax=plt.subplots(figsize=(8,8))
BC_Sub_Div.plot(column='Deaths',ax=ax,legend=True,scheme="User_Defined", 
         classification_kwds=dict(bins=[0,1,2,4,
             BC_Sub_Div['Deaths'].max()]),
                    edgecolor='k')


ax.set_ylim(PID_BC.geometry.y.min()-5e4,PID_BC.geometry.y.max()+5e4)
ax.set_xlim(PID_BC.geometry.x.min()-5e4,PID_BC.geometry.x.max()+5e4)

# BC_Sub_Div_Clip.to_file('data/BC_Sub_Divisions.shp')

<IPython.core.display.Javascript object>

(776693.5463433585, 1786914.951293358)

In [96]:

Sel = BC_Sub_Div.loc[BC_Sub_Div['Mask']==0]

plt.figure()
plt.scatter(Sel['Indigenous_Pct'],Sel['Deaths'])

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x2ab86829970>


## Kernel Density

In [57]:
from sklearn.neighbors import KernelDensity
import pandas as pd
import geopandas as gpd
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import rasterio as rio
from rasterio.plot import show

def kde2D(x, y, bandwidth, cell_size=1e3, **kwargs): 
    """Build 2D kernel density estimate (KDE)."""
    # Transform ipnut points to x,y pairs
    xy_train  = np.vstack([y, x]).T
    # Fit the kernel density model
    kde_skl = KernelDensity(bandwidth=bandwidth, **kwargs)
    kde_skl.fit(xy_train)

    """Construct the Output Image"""
    # Our "Null" hypothesis is a uniform 2D distribution - create a 2D grid
    # Subtract/Add the cell size to the min/max the intervals are fully inclusive of the feature space
    x_ax = np.arange(x.min()-cell_size,x.max()+cell_size,cell_size)
    y_ax = np.arange(y.min()-cell_size,y.max()+cell_size,cell_size)
    xx, yy = np.meshgrid(x_ax,y_ax)
    
    # Transform the grid points to x,y pairs
    xy_test = (np.vstack([yy.ravel(), xx.ravel()]).T)
    
    # score_samples() returns the log-likelihood of the samples
    # convert units to the cell size (z will ~ sum to 1 after conversion)
    z = np.exp(kde_skl.score_samples(xy_test))*cell_size**2
    return xx, yy, np.reshape(z, xx.shape)


x = PID_BC.geometry.x.values
y = PID_BC.geometry.y.values

xx, yy, zz = kde2D(x, y,
                   5e3,# 5km band width
                   cell_size=1e3, #1km cell size
                   kernel='linear')

print(zz.min(),zz.max(),zz.mean(),(zz).sum())

fig,ax=plt.subplots(figsize=(10,10))
cb = ax.pcolormesh(xx, yy, zz,shading='auto')

G = PID_BC.groupby(['latitude','longitude']).first()[['geometry','address_intersection','city_town']]
G['count']=PID_BC.groupby(['latitude','longitude']).count()['INDEX']

ax.scatter(G.geometry.x, G.geometry.y, s=100,edgecolor='k',c=G['count'], facecolor='k',cmap='Reds')#,legend=True)
# ax.legend()
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.05)

plt.colorbar(cb, cax=cax)



0.0 0.0031357776637190348 1.1623636394508778e-06 0.9880125806241644


<IPython.core.display.Javascript object>

<matplotlib.colorbar.Colorbar at 0x2abe0870ca0>

In [61]:
# print(G.sort_values(by='count'))

print(PID_BC.loc[PID_BC['latitude']==49.283941])

Empty GeoDataFrame
Columns: [date, INDEX, id_victim, first_name, last_name, middle_name, age, gender, race, prov, department, cause_death, circumstances_of_death, data_source, extra_source, link, name, city_town, address_intersection, postal_code, ethnic_ancestry, immigrant_refugee_naturalized, charges, officers involved, armed_type, taser_deployed, injured_officer, excited_delirium, mentral_distress_disorder, substance_abuse, charge_type, Comp, id_incident, KCC_posts, ID, Temp_Date, summary, ds_rank, Type, latitude, longitude, geocoding_attempt, geocoding_return, geocoding_notes, geocode_city, geometry]
Index: []

[0 rows x 46 columns]


In [62]:

import rasterio as rio
from rasterio.plot import show
trans = rio.transform.from_bounds(xx.min(), yy.min(), xx.max(), yy.max(), int(xx.shape[0]), int(yy.shape[1]))

print(xx.min(), yy.min(), xx.max(), yy.max())
print(trans)

with rio.open('example.tif', 'w',
                  dtype=rio.float32,
                  count=1,
                  compress='lzw',
                  width=int(xx.shape[0]),
                  height=int(yy.shape[1]),
                  transform=trans) as dst:
    dst.write(np.flip(zz,axis=0).astype(rio.float32), 1)
fig,ax=plt.subplots()
with rio.open('example.tif','r') as Test:
    show(Test,ax=ax)
    array=Test.read()[0]

825693.5463433585 380247.4026558375 1737693.5463433585 1310247.4026558376
| 979.59, 0.00, 825693.55|
| 0.00,-1018.62, 1310247.40|
| 0.00, 0.00, 1.00|


<IPython.core.display.Javascript object>

In [65]:
array.shape
from rasterstats import zonal_stats

In [82]:
BC_Sub_Div['Kernel_Density']=np.nan
for i,row in BC_Sub_Div.iterrows():
    ZS = zonal_stats(row.geometry, array, affine=trans, stats=['min', 'max', 'mean', 'median', 'majority'])
    if ZS[0]['max'] != None:
        BC_Sub_Div.loc[BC_Sub_Div.index==i,'Kernel_Density']=ZS[0]['max']
#         print(ZS[0]['max'])
            
BC_Sub_Div['KD_Norm'] = (BC_Sub_Div['Kernel_Density']-BC_Sub_Div['Kernel_Density'].mean())/BC_Sub_Div['Kernel_Density'].std()

In [84]:
fig,ax=plt.subplots(figsize=(8,8))
BC_Sub_Div.plot(column='KD_Norm',ax=ax,legend=True,scheme="User_Defined", 
         classification_kwds=dict(
             bins=[BC_Sub_Div['KD_Norm'].min(),BC_Sub_Div['KD_Norm'].quantile(.25),
                   BC_Sub_Div['KD_Norm'].median(),BC_Sub_Div['KD_Norm'].quantile(.75),
             BC_Sub_Div['KD_Norm'].max()]),
                    edgecolor='k')

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [89]:
plt.figure()
plt.scatter(BC_Sub_Div['Indigenous_Pct'],BC_Sub_Div['KD_Norm'])

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x2abe6e9db80>