# SOUTH CAROLINA POLICE SHOOTING DATA

Code to import necessary dependencies 

In [1]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models.tools import HoverTool
from bokeh.models import ColumnDataSource
from bokeh.models.formatters import NumeralTickFormatter

import os
import sqlite3
import csv
import datetime
import math

import pandas as pd
import numpy as np

#read the CSV
df = pd.read_csv('compiled_data.csv')

#connect to a database
conn = sqlite3.connect("policeData.db") #if the db does not exist, this creates a policeData.db file in the current directory

#store table in the database:
df.to_sql('data', conn,  if_exists='replace')

output_notebook()



#### Clean up some of the data

In [2]:
#converts opened and time columns to datatime
df[['opened', 'time']] = df[['opened', 'time']].apply(pd.to_datetime)

#replaces NaN with 0
df[['officersInjured', 'officersKilled', 'suspectsInjured', 'suspectsKilled']] = df[['officersInjured', 'officersKilled', 'suspectsInjured', 'suspectsKilled']].fillna(0)

#replaces empty cells with 0
df['officersInjured'].replace(' ', 0, inplace=True)
df['officersKilled'].replace(' ', 0, inplace=True)
df['suspectsInjured'].replace(' ', 0, inplace=True)
df['suspectsKilled'].replace(' ', 0, inplace=True)

#converts columns to int
df[['officersInjured', 'officersKilled', 'suspectsInjured', 'suspectsKilled']] = df[['officersInjured', 'officersKilled', 'suspectsInjured', 'suspectsKilled']].astype(int)

#creates consistent county names
df['county'].replace('(ORANGEBURG)', 'ORANGEBURG', inplace=True)
df['county'].replace(['YORK ', 'YORK *'], 'YORK', inplace=True)
df['county'] = df['county'].str.upper()

#creates consistent agency names
df['agency'] = df['agency'].fillna('unknown')
df['agency'].replace(to_replace=' COUNTY ', value=' CO ', regex=True, inplace=True)
df['agency'].replace(to_replace=' METH COL ', value=' Methodist College ', regex=True, inplace=True)
df['agency'].replace(to_replace=' PD', value=' police department', regex=True, inplace=True)
df['agency'].replace(to_replace=' PS', value=' public safety', regex=True, inplace=True)
df['agency'].replace(to_replace=' PD ', value=' police department ', regex=True, inplace=True)
df['agency'].replace(to_replace=' SO', value=' county sheriff office', regex=True, inplace=True)
df['agency'].replace(to_replace=' CO', value=' county', regex=True, inplace=True)
df['agency'].replace(to_replace=' CO ', value=' county ', regex=True, inplace=True)
df['agency'].replace(to_replace='PPP', value='proabtion parole and pardon services', regex=True, inplace=True)
df['agency'].replace(to_replace='SCDC', value='South Carolina Department of Corrections', regex=True, inplace=True)
df['agency'].replace(to_replace='USMS', value='U.S. Marshals Service', regex=True, inplace=True)
df['agency'].replace(to_replace=' DPS ', value=' department of public safety ', regex=True, inplace=True)
df['agency'].replace(to_replace=' DPS', value=' department of public safety', regex=True, inplace=True)
df['agency'].replace(to_replace=' DEPT. PUB. ', value=' department of public safety ', regex=True, inplace=True)
df['agency'].replace(to_replace=' DEPT. ', value=' department ', regex=True, inplace=True)
df['agency'].replace(to_replace='DNR', value='department of natural resources', regex=True, inplace=True)
df['agency'].replace(to_replace='MT ', value='MT. ', regex=True, inplace=True)
df['agency'].replace(to_replace='N. ', value='NORTH ', regex=True, inplace=True)
df['agency'].replace(to_replace='US ', value='U.S. ', regex=True, inplace=True)
df['agency'].replace(to_replace='US ', value='U.S. ', regex=True, inplace=True)
df['agency'] = df['agency'].str.upper()
df['agency'].replace(to_replace=' COUNTY COUNTY ', value=' COUNTY ', regex=True, inplace=True)
df['agency'].replace(to_replace='BERKELEY COUNTY SHERIFF OFFICE ', value='BERKELEY COUNTY SHERIFF OFFICE', regex=True, inplace=True)
df['agency'].replace(to_replace='YORK COUNTY SHERIFF OFFICE ', value='YORK COUNTY SHERIFF OFFICE', regex=True, inplace=True)
df['agency'].replace(to_replace='COUNTYRRECTIONS', value='CORRECTIONS', regex=True, inplace=True)
df['agency'].replace(to_replace='SCHP', value='SOUTH CAROLINA HIGHWAY PATROL', regex=True, inplace=True)
df['agency'].replace('(DEA *****)', 'DEA', inplace=True)
df['agency'].replace('SLED - S/A AARON RAWL', 'SLED', regex=True, inplace=True)

#creates consistent offense names
df['offense'].replace(to_replace='unk', value='UNKNOWN', regex=True, inplace=True)
df['offense'] = df['offense'].fillna('unknown')
df['offense'].replace(to_replace='DOMESTIC CALL', value='DOMESTIC', regex=True, inplace=True)
df['offense'].replace(to_replace=' NARCOTICS WARRANT', value='NARCOTICS WARRANT', regex=True, inplace=True)
df['offense'].replace(to_replace='VEHICLE B & E', value='BREAKING & ENTERING (AUTO)', regex=True, inplace=True)
df['offense'].replace(to_replace=['B & E', 'BREAK-IN'], value='BREAKING & ENTERING', regex=True, inplace=True)
df['offense'].replace(to_replace='BARRICATED', value='BARRICADED', regex=True, inplace=True)
df['offense'].replace(to_replace='BURGLARY SUSPECT', value='BURGLARY', regex=True, inplace=True)
df['offense'].replace(to_replace=['DISTURBANCE CALL', 'DISTURBANCE/FIGHT'], value='DISTURBANCE', regex=True, inplace=True)
df['offense'].replace(to_replace='911 HANG UP CALL / OFF RESPONDED', value='911 CALL', regex=True, inplace=True)
df['offense'].replace(to_replace='ALARM CALL/HOME INVASION', value='HOME INVASION', regex=True, inplace=True)
df['offense'].replace(to_replace=['CAR CHASE', 'VEHICLE CHASE', 'SHOPLIFTING/VEHICLE PURSUIT'], value='VEHICLE PURSUIT', regex=True, inplace=True)
df['offense'].replace(to_replace=['SHOTS FIRED CALL', 'SHOOTING COMPLAINT', 'DISTURBANCE/SHOTS FIRED'], value='SHOTS FIRED', regex=True, inplace=True)
df['offense'].replace(to_replace=['SUICIDAL SUBJECT', 'SUICIDE CALL'], value='SUICIDE THREAT', regex=True, inplace=True)
df['offense'].replace(to_replace=['SUSPICIOUS PERSON', 'SUSPICIOUS VEHICLE', 'SUSPECIOUS VEHICLE'], value='SUSPICIOUS ACTIVITY', regex=True, inplace=True)
df['offense'].replace(to_replace=['MENTAL HEALTH PATIENT', 'MENTAL HEALTH TRANSPORT', 'MENTAL PATIENT', 'MENTAL SUBJECT', 'MENTAL SUBJECT/WELFARE CHECK', 'MENTAL HEALTH ISSUES/WELFARE CHECK'], value='MENTAL HEALTH ISSUES', regex=True, inplace=True)
df['offense'].replace(to_replace=['ARREST WARRANT SERVICE', 'BENCH WARRANT', 'DRUG WARRANT ', 'FUGITIVE WARRANT', 'NARCOTICS WARRANT', 'SERVING WARRANT', 'WARRANT SERVICESS'], value='WARRANT SERVICES', regex=True, inplace=True)
df['offense'].replace(to_replace='REPORT OF POSSIBLE ARMED ROBBERY', value='ARMED ROBBERY', regex=True, inplace=True)
df['offense'].replace(to_replace='SUSPECT STEALING CALL', value='THEFT', regex=True, inplace=True)
df['offense'].replace(to_replace=['DRUG COMPLAINT', 'DRUG OPERATION', 'NARCOTICS INVESTIGATION'], value='DRUG INVESTIGATION', regex=True, inplace=True)
df['offense'].replace(to_replace='SHOPLIFTING SUSPECT', value='SHOPLIFTING', regex=True, inplace=True)
df['offense'].replace(to_replace=['TRAFFIC STOP/DRUGS', 'TRAFFIC STOP/STOLEN VEH', 'NARCOTICS/TRAFFIC STOP' ], value='TRAFFIC STOP', regex=True, inplace=True)
df['offense'].replace(to_replace='GANG MEMBER CONFRONT', value='GANG MEMBER CONFRONTATION', regex=True, inplace=True)
df['offense'].replace(to_replace='THREATENING SOMEONE WITH GUN', value='MAN WITH A GUN', regex=True, inplace=True)
df['offense'] = df['offense'].str.upper()

#replaces officerEthnicity data with readable values
df['officerEthnicity'] = df['officerEthnicity'].fillna('unknown')
df['officerEthnicity'].replace('B', 'black', inplace=True)
df['officerEthnicity'].replace('BL', 'black & latino', inplace=True)
df['officerEthnicity'].replace(to_replace=['BW', 'BW ','WB'], value='white & black', inplace=True)
df['officerEthnicity'].replace('L', 'latino', inplace=True)
df['officerEthnicity'].replace(to_replace=['LW', 'WL'], value='white & latino', inplace=True)
df['officerEthnicity'].replace(to_replace=['W', 'W '], value='white', inplace=True)
df['officerEthnicity'] = df['officerEthnicity'].str.upper()

#replaces suspectEthnicity data with readable values
df['suspectEthnicity'] = df['suspectEthnicity'].fillna('unknown')
df['suspectEthnicity'].replace('B', 'black', inplace=True)
df['suspectEthnicity'].replace('BL', 'black & latino', inplace=True)
df['suspectEthnicity'].replace(to_replace=['BW', 'BW ','WB'], value='white & black', inplace=True)
df['suspectEthnicity'].replace('L', 'latino', inplace=True)
df['suspectEthnicity'].replace(to_replace=['LW', 'WL'], value='white & latino', inplace=True)
df['suspectEthnicity'].replace(to_replace=['W', 'W '], value='white', inplace=True)
df['suspectEthnicity'].replace('I', 'indian', inplace=True)
df['suspectEthnicity'] = df['suspectEthnicity'].str.upper()



#### New table showing all the data in the columns that were changed

In [3]:
df[[
    'opened', 
    'county', 
    'agency', 
    'officersInjured', 
    'officersKilled', 
    'suspectsInjured', 
    'suspectsKilled', 
    'offense', 
    'officerEthnicity', 
    'suspectEthnicity']]
    

Unnamed: 0,opened,county,agency,officersInjured,officersKilled,suspectsInjured,suspectsKilled,offense,officerEthnicity,suspectEthnicity
0,2018-01-03,DORCHESTER,NORTH CHARLESTON POLICE DEPARTMENT,0,0,1,0,ARMED ROBBERY,UNKNOWN,UNKNOWN
1,2018-01-11,LEXINGTON,LEXINGTON COUNTY SHERIFF OFFICE,0,0,0,1,TRAFFIC STOP,UNKNOWN,UNKNOWN
2,2018-01-16,YORK,YORK COUNTY SHERIFF OFFICE/ YORK POLICE DEPART...,3,1,1,0,DOMESTIC,UNKNOWN,UNKNOWN
3,2018-01-21,SPARTANBURG,LAURENORTH COUNTY SHERIFF OFFICE,0,0,1,0,STOLEN VEHICLE,UNKNOWN,UNKNOWN
4,2018-01-24,SPARTANBURG,GREER POLICE DEPARTMENT,1,0,1,0,SUSPICIOUS ACTIVITY,UNKNOWN,UNKNOWN
5,2018-02-15,ANDERSON,ANDERSON COUNTY SHERIFF OFFICE,0,0,1,0,TRAFFIC STOP,UNKNOWN,UNKNOWN
6,2018-02-27,CHESTER,CHESTER COUNTY SHERIFF OFFICE / CHESTER POLICE...,0,0,0,1,STOLEN VEHICLE,UNKNOWN,UNKNOWN
7,2018-03-02,BERKELEY,BERKELEY COUNTY SHERIFF OFFICE,1,0,1,0,DOMESTIC,UNKNOWN,UNKNOWN
8,2018-03-15,AIKEN,AIKEN COUNTY SHERIFF OFFICE,0,0,1,0,MENTAL HEALTH ISSUES,UNKNOWN,UNKNOWN
9,2018-03-19,GREENVILLE,GREENVILLE COUNTY SHERIFF OFFICE,0,0,0,1,SUICIDE THREAT,UNKNOWN,UNKNOWN


#### Look at how many suspects where killed by the county

In [4]:
#create a suspects killed by county variable that returns a new data instead of a groupby object
suspects_df = df.groupby(['county'])['suspectsKilled'].sum().reset_index()

#removes any county that did not kill a suspect
suspects_df = suspects_df[suspects_df.suspectsKilled > 0]

#sort the number killed from highest to lowest
suspects_df = suspects_df.sort_values(['suspectsKilled'], ascending=False)

#create a list of unique county names
co = list(suspects_df.county.unique())

#creates data source from the new data frame
s_source = ColumnDataSource(suspects_df)

#create hover tools
hover = HoverTool()
hover.tooltips = """
    <div>
        <h3>@county COUNTY</h3>
        <div><strong>Suspects killed:</strong> @suspectsKilled</div>
    </div>
"""

#add plot
p = figure(
    x_range=co,
    plot_height=600,
    plot_width=1000,
    title="Suspects killed by county",
    x_axis_label="County",
    y_axis_label="Suspects Killed",
    toolbar_location=None,
    tools="")

p.vbar(
    x='county',
    top='suspectsKilled',
    width=0.9,
    color='#009bff',
    #fill_alpha=0.9,
    source=s_source)

p.xgrid.grid_line_color = None
p.y_range.start = 0
p.xaxis.major_label_orientation = math.pi/3
p.add_tools(hover)

#show the result
show(p)


#### Look at the number suspects injured by county

In [5]:
#create a suspects injured by county variable that returns a new data instead of a groupby object
suspects_df = df.groupby(['county'])['suspectsInjured'].sum().reset_index()

#removes any county that did not injure a suspect
suspects_df = suspects_df[suspects_df.suspectsInjured > 0]

#sort the number injured from highest to lowest
suspects_df = suspects_df.sort_values(['suspectsInjured'], ascending=False)

#create a list of unique county names
co = list(suspects_df.county.unique())

#creates data source from the new data frame
s_source = ColumnDataSource(suspects_df)

#create hover tools
hover = HoverTool()
hover.tooltips = """
    <div>
        <h3>@county COUNTY</h3>
        <div><strong>Suspects injured:</strong> @suspectsInjured</div>
    </div>
"""

#add plot
p = figure(
    x_range=co,
    plot_height=600,
    plot_width=1000,
    title="Suspects injured by county",
    x_axis_label="County",
    y_axis_label="Suspects Injured",
    toolbar_location=None,
    tools="")

p.vbar(
    x='county',
    top='suspectsInjured',
    width=0.9,
    color='#009bff',
    #fill_alpha=0.9,
    source=s_source)

p.xgrid.grid_line_color = None
p.y_range.start = 0
p.xaxis.major_label_orientation = math.pi/3
p.add_tools(hover)

#show the result
show(p)
