In [1]:
import pandas as pd

In [2]:
# Functions
class CleanData:
    def __init__(self, df):
        self.df = df
        
    def get_record(self, name: str):
        return self.df[self.df.name == name]

    def keep_record(self, records: list):
        for record in records:
            self.df.loc[record, 'keep'] = True

In [3]:
# Import data
df = pd.read_csv("fatal-police-shootings-data.csv", index_col="id")

# Add a column to determine if we want to keep the data or not
df["keep"] = True
df.head()

Unnamed: 0_level_0,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,keep
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
3,Tim Elliot,2015-01-02,shot,gun,53.0,M,A,Shelton,WA,True,attack,Not fleeing,False,True
4,Lewis Lee Lembke,2015-01-02,shot,gun,47.0,M,W,Aloha,OR,False,attack,Not fleeing,False,True
5,John Paul Quintero,2015-01-03,shot and Tasered,unarmed,23.0,M,H,Wichita,KS,False,other,Not fleeing,False,True
8,Matthew Hoffman,2015-01-04,shot,toy weapon,32.0,M,W,San Francisco,CA,True,attack,Not fleeing,False,True
9,Michael Rodriguez,2015-01-04,shot,nail gun,39.0,M,H,Evans,CO,False,attack,Not fleeing,False,True


In [4]:
len(df)

5424

# Checking Duplicates

In [5]:
# Remove TK TK (TK = to come, https://en.wikipedia.org/wiki/To_come_(publishing))
df_notk = df[df.name != "TK TK"].copy()

# Check for duplicate records in which name and state are the same
duplicated = list(df_notk[df_notk.duplicated(["name", "state"], keep="first")].name)

In [6]:
# Set all duplicate values to keep = False
df.loc[df.name.isin(duplicated), "keep"] = False

# Instantiate CleanData class
clean = CleanData(df)

# Filter the original dataset to look at the duplicated values
duplicated_df = df[df.name.isin(duplicated)].copy()
duplicated_df

Unnamed: 0_level_0,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,keep
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
614,Daniel Hernandez,2015-07-07,shot,gun,47.0,M,H,Bakersfield,CA,False,attack,Not fleeing,False,False
4195,Roderick McDaniel,2018-11-19,shot,gun,33.0,M,B,Magnolia,AR,False,attack,Not fleeing,False,False
4237,Roderick McDaniel,2018-11-20,shot,vehicle,33.0,M,B,Magnolia,AR,False,attack,Not fleeing,False,False
5128,Clayton Andrews,2019-10-25,shot,gun,40.0,M,W,Kansas,OK,False,attack,Not fleeing,False,False
5135,Clayton Andrews,2019-10-26,shot,gun,40.0,M,W,Creek County,OK,True,attack,Car,False,False
5150,Benjamin Diaz,2019-11-01,shot,sharp object,22.0,M,H,Alamogordo,NM,False,attack,Car,False,False
5191,Benjamin Diaz,2019-11-01,shot,box cutter,22.0,M,H,Alamogordo,NM,False,attack,Not fleeing,False,False
5389,Miguel Mercado Segura,2020-01-20,shot,gun,31.0,M,H,Fountain Valley,CA,False,other,Foot,False,False
5515,Miguel Mercado Segura,2020-01-21,shot,gun,31.0,M,H,Fountain Valley,CA,False,other,Foot,False,False
5537,Timothy Leroy Harrington,2020-02-14,shot,gun,58.0,M,W,Polkton,NC,False,other,Car,False,False


In [7]:
# Number of duplicate values
len(duplicated_df.name.unique())

8

In [8]:
name = "Terry Hasty"
# 5572 [ref](https://www.wmbfnews.com/2020/02/25/deputy-suspect-shot-sumter-county-residence/)
clean.keep_record([5572])
clean.get_record(name)

Unnamed: 0_level_0,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,keep
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
5572,Terry Hasty,2020-02-25,shot,gun,56.0,M,,Sumter County,SC,False,attack,Not fleeing,False,True
5603,Terry Hasty,2020-02-25,shot,gun,56.0,M,,Dalzell,SC,False,attack,Not fleeing,False,False


In [9]:
name = "Daniel Hernandez"
# Keep both entries, different dates
# 614 [ref](https://www.bakersfield.com/archives/bakersfield-police-man-fatally-shot-tuesday-pointed-gun-at-officers/article_1fc51179-1fbe-5e1b-8938-a122e0080cb3.html)
# 5782 [ref](https://www.latimes.com/california/story/2020-05-07/lapd-officers-named-in-fatal-shooting-of-alleged-gunman)
clean.keep_record([614, 5782])
clean.get_record(name)

Unnamed: 0_level_0,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,keep
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
614,Daniel Hernandez,2015-07-07,shot,gun,47.0,M,H,Bakersfield,CA,False,attack,Not fleeing,False,True
5782,Daniel Hernandez,2020-04-30,shot,gun,,M,,Los Angeles,CA,False,other,Car,False,True


In [10]:
name = "Roderick McDaniel"
# Likely 4237 is the correct entry
# 4237 [ref](https://www.arkansasonline.com/news/2018/nov/28/prosecutor-details-magnolia-slayings-20/)

clean.keep_record([4237])
clean.get_record(name)

Unnamed: 0_level_0,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,keep
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
4195,Roderick McDaniel,2018-11-19,shot,gun,33.0,M,B,Magnolia,AR,False,attack,Not fleeing,False,False
4237,Roderick McDaniel,2018-11-20,shot,vehicle,33.0,M,B,Magnolia,AR,False,attack,Not fleeing,False,True


In [11]:
name = "Clayton Andrews"
# 5128 [ref](https://hl.nwaonline.com/news/2019/oct/30/osbi-investigating-fatal-officer-involv/)

clean.keep_record([5128])
clean.get_record(name)

Unnamed: 0_level_0,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,keep
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
5128,Clayton Andrews,2019-10-25,shot,gun,40.0,M,W,Kansas,OK,False,attack,Not fleeing,False,True
5135,Clayton Andrews,2019-10-26,shot,gun,40.0,M,W,Creek County,OK,True,attack,Car,False,False


In [12]:
name = "Benjamin Diaz"
# NUM [ref](https://kfoxtv.com/news/local/officer-involved-shooting-reported-near-alamogordo)

clean.keep_record([5191])
clean.get_record(name)

Unnamed: 0_level_0,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,keep
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
5150,Benjamin Diaz,2019-11-01,shot,sharp object,22.0,M,H,Alamogordo,NM,False,attack,Car,False,False
5191,Benjamin Diaz,2019-11-01,shot,box cutter,22.0,M,H,Alamogordo,NM,False,attack,Not fleeing,False,True


In [13]:
name = "Miguel Mercado Segura"
# Occured on the Jan 20, 2020
# [ref](https://www.latimes.com/socal/daily-pilot/news/story/2020-01-22/man-killed-in-officer-involved-shooting-in-fountain-valley-is-identified)

clean.keep_record([5515])
clean.get_record(name)

Unnamed: 0_level_0,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,keep
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
5389,Miguel Mercado Segura,2020-01-20,shot,gun,31.0,M,H,Fountain Valley,CA,False,other,Foot,False,False
5515,Miguel Mercado Segura,2020-01-21,shot,gun,31.0,M,H,Fountain Valley,CA,False,other,Foot,False,True


In [14]:
name = "Timothy Leroy Harrington"
# Threat level unclear
# 5537 [ref](https://www.wbtv.com/2020/02/15/deputy-fatally-shoots-armed-suspect-anson-co-sheriff-says/)

clean.keep_record([5537])
clean.get_record(name)

Unnamed: 0_level_0,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,keep
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
5537,Timothy Leroy Harrington,2020-02-14,shot,gun,58.0,M,W,Polkton,NC,False,other,Car,False,True
5548,Timothy Leroy Harrington,2020-02-14,shot,gun,58.0,M,W,Polkton,NC,False,attack,Car,False,False


In [15]:
name = "William Patrick Floyd"
# Occured in Portland
# 5691 [ref](https://www.oregonlive.com/pacific-northwest-news/2020/03/troopers-fatally-shoot-person-on-i-5-south-of-salem.html)

clean.keep_record([5691])
clean.get_record(name)

Unnamed: 0_level_0,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,keep
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
5691,William Patrick Floyd,2020-03-27,shot,gun,51.0,M,,Portland,OR,False,other,Not fleeing,False,True
5721,William Patrick Floyd,2020-03-27,shot,gun,51.0,M,,Salem,OR,False,other,,False,False


In [17]:
# Keep only the records we want
df = df[df.keep == True]

# Drop the keep column
df.drop(columns=["keep"], axis=0, inplace=True)

AttributeError: 'DataFrame' object has no attribute 'keep'

In [18]:
len(df)

5417

In [19]:
# Export to csv
df.to_csv("fatal-police-shootings-data.csv", index=False)

# Reading

#### AN EMPIRICAL ANALYSIS OF RACIAL DIFFERENCES IN POLICE USE OF FORCE
*Roland G. Fryer, Jr*

https://www.nber.org/papers/w22399.pdf

#### CAN WE PULL BACK FROM THE BRINK
*Sam Harris*

https://samharris.org/can-pull-back-brink/