In [2]:
# EDA on the CAD-combined.csv file.  Portland PD Dispatch data from 2012 to September 2019.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

plt.style.use('ggplot')
font = {'weight': 'bold',
        'size':   16}
plt.rc('font', **font)

# Date ranges from April 2012 to September 2019

In [3]:
filepath = '/Users/will/Desktop/Portland/calls_for_service/CAD-combined.csv'
df = pd.read_csv(filepath)

In [4]:
# remove second column if needed
df.drop('Unnamed: 0', axis=1, inplace=True)

In [5]:
# Population Data:
years = [2018, 2017, 2016, 2015, 2014, 2013, 2012]
population = [653115, 648530, 643136, 631539, 620647, 609970, 604285]

In [9]:
# import make_datetime_column function from the src file:
%run /Users/will/dsi/PortlandPD/src/functions.py

col_with_dates = 'ReportMonthYear'
df = make_datetime_column(df, col_with_dates)
df

Unnamed: 0,Address,CallNumber,FinalCallCategory,FinalCallGroup,Neighborhood,OpenDataLat,OpenDataLon,OpenDataX,OpenDataY,Priority,ReportMonthYear,ResponseTime_sec,TimeInQueue_sec,TravelTime_sec
111217,7500 BLOCK OF SE REEDWAY ST,93550832,Disturbance,Disorder,Mt Scott-Arleta,45.481633,-122.585407,7667285.0,668593.0,High,2012-01-01,326,27,299
75146,4000 BLOCK OF NE FREMONT ST,89697416,School Incident,Disorder,Beaumont-Wilshire,45.548340,-122.621264,7658730.0,693146.0,Low,2012-01-01,1470,261,1209
151704,N DENVER AVE / N LOMBARD ST,95401674,DUII,Traffic,Kenton,45.577129,-122.686866,7642214.0,704087.0,Medium,2012-01-01,421,79,342
151703,N DENVER AVE / N LOMBARD ST,93109414,Welfare Check,Disorder,Kenton,45.577129,-122.686866,7642214.0,704087.0,Medium,2012-01-01,499,353,146
151702,N DENVER AVE / N LOMBARD ST,91547714,Welfare Check,Disorder,Kenton,45.577129,-122.686866,7642214.0,704087.0,Medium,2012-01-01,250,51,199
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1600852,1200 BLOCK OF NE 2ND AVE,2785432922,Disturbance,Disorder,Lloyd,45.532557,-122.663756,7647694.0,687682.0,High,2019-11-01,310,84,226
1600851,1200 BLOCK OF NE 2ND AVE,2785306217,Follow Up,Other,Lloyd,45.532557,-122.663756,7647694.0,687682.0,Low,2019-11-01,7770,7669,101
1600850,1200 BLOCK OF NE 2ND AVE,2785305616,Missing Person,Civil,Lloyd,45.532557,-122.663756,7647694.0,687682.0,Low,2019-11-01,8644,8195,449
1660843,4600 BLOCK OF SE 63RD AVE,2777856617,Disturbance,Disorder,Mt Scott-Arleta,45.489299,-122.597918,7664150.0,671470.0,Low,2019-11-01,3419,2754,665


In [10]:
# Sort values by the reporting month:
df = df.sort_values(by='ReportMonthYear')
df.head()

Unnamed: 0,Address,CallNumber,FinalCallCategory,FinalCallGroup,Neighborhood,OpenDataLat,OpenDataLon,OpenDataX,OpenDataY,Priority,ReportMonthYear,ResponseTime_sec,TimeInQueue_sec,TravelTime_sec
111217,7500 BLOCK OF SE REEDWAY ST,93550832,Disturbance,Disorder,Mt Scott-Arleta,45.481633,-122.585407,7667285.0,668593.0,High,2012-01-01,326,27,299
42733,1700 BLOCK OF NE 42ND AVE,88798116,Alarm,Alarm,Hollywood,45.535628,-122.61986,7658968.0,688503.0,Low,2012-01-01,495,357,138
42414,1600 BLOCK OF SW MORRISON ST,92459334,Unwanted Person,Disorder,Goose Hollow,45.5218,-122.688622,7641218.0,683933.0,Medium,2012-01-01,384,23,361
110551,7500 BLOCK OF N HEPPNER AVE,91762322,Threat,Crime,St Johns,45.58752,-122.736909,7629510.0,708225.0,High,2012-01-01,185,24,161
42465,1600 BLOCK OF SW SUNSET BLVD,94520716,Alarm,Alarm,Hillsdale,45.480406,-122.695086,7639152.0,668891.0,Low,2012-01-01,872,92,780


In [11]:
# What happened to calls for drug calls since the legalization of marijuana?
# Note that for drug violations, the FinalCallGroup = Crime and FinalCallCategory = Vice

# Select the rows that pertain to Vice (potential drugs)
vc = df[df['FinalCallCategory'] == 'Vice']

In [12]:
vcs = vc.shape[0]
dfs = df.shape[0]
print(f'Number of vice calls: {vcs}, number of dfs: {dfs}')

one_vice_per_call = dfs/vcs
ratio_of_vice_overall = vcs/dfs
ratio_of_vice_overall
print(f'There is 1 vice call per ~{one_vice_per_call:0.1f} of total calls')

Number of vice calls: 23531, number of dfs: 1781773
There is 1 vice call per ~75.7 of total calls


In [13]:
# Group the calls by various dates, pre and post weed legalization:
# Weed legal to USE: 2015-07-01
vc_preweeduselegal = vc[vc['ReportMonthYear'] < '2015-07-01']
df_preweeduselegal = df[df['ReportMonthYear'] < '2015-07-01']
vc_postweeduselegal = vc[vc['ReportMonthYear'] >= '2015-07-01']
df_postweeduselegal = df[df['ReportMonthYear'] >= '2015-07-01']

# Weed is legal to SELL: 2015-10-01
vc_preweedstorelegal = vc[vc['ReportMonthYear'] < '2015-10-01']
df_preweedstorelegal = df[df['ReportMonthYear'] < '2015-10-01']
vc_postweedstorelegal = vc[vc['ReportMonthYear'] >= '2015-10-01']
df_postweedstorelegal = df[df['ReportMonthYear'] >= '2015-10-01']

In [14]:
print('Pre weed-use:')
print(f'Number of vice calls: {vc_preweeduselegal.shape[0]}, total calls: {df_preweeduselegal.shape[0]}.')
print(f'Roughly 1 vice call per ~{df_preweeduselegal.shape[0]/vc_preweeduselegal.shape[0]:0.1f} total calls.')
print('Post weed-use:')
print(f'Number of vice calls: {vc_postweeduselegal.shape[0]}, total calls: {df_postweeduselegal.shape[0]}.')
print(f'Roughly 1 vice call per ~{df_postweeduselegal.shape[0]/vc_postweeduselegal.shape[0]:0.1f} total calls.')

Pre weed-use:
Number of vice calls: 10295, total calls: 709272.
Roughly 1 vice call per ~68.9 total calls.
Post weed-use:
Number of vice calls: 13236, total calls: 1072501.
Roughly 1 vice call per ~81.0 total calls.


In [15]:
print('Pre weed-store:')
print(f'Number of vice calls: {vc_preweedstorelegal.shape[0]}, total calls: {df_preweedstorelegal.shape[0]}.')
print(f'Roughly 1 vice call per ~{df_preweedstorelegal.shape[0]/vc_preweedstorelegal.shape[0]:0.1f} total calls.')
print('Post weed-store:')
print(f'Number of vice calls: {vc_postweedstorelegal.shape[0]}, total calls: {df_postweedstorelegal.shape[0]}.')
print(f'Roughly 1 vice call per ~{df_postweedstorelegal.shape[0]/vc_postweedstorelegal.shape[0]:0.1f} total calls.')

Pre weed-store:
Number of vice calls: 11194, total calls: 774116.
Roughly 1 vice call per ~69.2 total calls.
Post weed-store:
Number of vice calls: 12337, total calls: 1007657.
Roughly 1 vice call per ~81.7 total calls.


In [2]:
# Calls went down in both categories by 17.5% and 18%.

In [17]:
# Create neighborhood list, remove the nan at index 4, and 
neighborhood_array = df['Neighborhood'].unique()
neighborhood_list = [str(i) for i in neighborhood_array]
neighborhood_list.remove('nan')
neighborhood_list.sort()
