#### state_postal_file = "CSV_data/state_postal_code.csv"
state,postal_code
Alabama ,AL

#### shootings_file = "CSV_data/fatal_police_shootings_data(redundant).csv"
Victim's age,Victim's gender,Victim's race,Date of Incident (month/day/year),City,State,County,Cause of death,A brief description of the circumstances surrounding the death,Official disposition of death (justified or other),Criminal Charges?,Symptoms of mental illness?,Unarmed/Did Not Have an Actual Weapon,Alleged Weapon (Source: WaPo and Review of Cases Not Included in WaPo Database),Alleged Threat Level (Source: WaPo),Fleeing (Source: WaPo)
44,Male,Unknown race,10/28/2020,Trigg County,KY,Trigg,Gunshot,"Officers were called to the report of a man cutting a hole in the floor of his home with a chainsaw. When an officer arrived, they got into an altercation with Biby. As a result of the altercation, the officer discharged his service weapon, striking Biby. Biby was later pronounced dead at the Trigg County hospital. Sheriff Aaron Acree was allegedly involved in the shooting. ",Pending investigation,No known charges,No,Allegedly Armed,undetermined,undetermined,

#### pres_file = "CSV_data/president_county_candidate.csv"
state,county,candidate,party,voteStates
Delaware,Kent County,Joe Biden,DEM,44518

In [591]:
import sklearn as sk
import pandas as pd
import numpy as np
import csv
import time
import math
import datetime
from collections import defaultdict

In [595]:
# load folders
shootings_file = "CSV_data/fatal_police_shootings_detailed.csv"
pres_file = "CSV_data/president_county_candidate.csv"
pres_file_2016 = "CSV_data/pres16results.csv"
state_postal_file = "CSV_data/state_postal_code.csv"

shootings = pd.read_csv(shootings_file)
presidency = pd.read_csv(pres_file)
presidency2016 = pd.read_csv(pres_file_2016)
state_postal = pd.read_csv(state_postal_file)

In [596]:
# turn all strings in dataframe to lowercase
presidency = presidency.applymap(lambda s:s.lower() if type(s) == str else s)
state_postal = state_postal.applymap(lambda s:s.lower() if type(s) == str else s)
shootings = shootings.applymap(lambda s:s.lower() if type(s) == str else s)
presidency2016 = presidency2016.applymap(lambda s:s.lower() if type(s) == str else s)

In [597]:
state_postal_dic = {row["postal_code"]:row['state']  for i, row in state_postal.iterrows()}

# replace every state postal to state
# al -> alabama
shootings['state'] = shootings['state'].apply(lambda s: state_postal_dic.get(s, ""))
presidency2016['state'] = presidency2016['state'].apply(lambda s: state_postal_dic.get(s, ""))
presidency2016['state']

0       california 
1       california 
2         illinois 
3         illinois 
4            texas 
           ...     
6217         texas 
6218         texas 
6219         texas 
6220         texas 
6221         texas 
Name: state, Length: 6222, dtype: object

In [201]:
# turn all dates to timestamps
s = shootings['date'][0]
def process_date(date):
    if len(date) == 0:
        return 0

    dt = datetime.datetime.strptime(date, "%m/%d/%Y").timetuple()
    return time.mktime(dt)

In [598]:
gender_idx = {cause:i+1 for i,cause in enumerate(set(shootings['gender']))}
race_idx = {cause:i+1 for i,cause in enumerate(set(shootings['race']))}
cause_of_death_idx = {cause:i+1 for i,cause in enumerate(set(shootings['death']))}
charges_idx = {charges:i+1 for i,charges in enumerate(set(shootings['charges']))}
mental_illness_idx = {"yes":0, "no":1, "unknown": 2}
weapon_idx = {weapon:i+1 for i,weapon in enumerate(set(shootings['weapon']))}
weapon_type_idx = {weapon:i+1 for i,weapon in enumerate(set(shootings['weapon_type']))}
threat_idx = {threat:i+1 for i,threat in enumerate(set(shootings['threat']))}

In [599]:
# Fleeing index is a bit different
fleeing_set = set(shootings['fleeing'])
print(fleeing_set)

# We're going to set the data to 0 or 1 based on if he was fleeing or wasn't
def fleeing_idx_func(value):
    if value in ['car', 'foot']:
        return 1
    else:
        return 0

county_id_file = "CSV_data/county_code.csv"
county = pd.read_csv(county_id_file)
county = county.applymap(lambda s:s.lower() if type(s) == str else s)

only_state = county.loc[county['code'] % 1000 == 0]

keys = list(only_state['code'].values)
only_county = county[~county['code'].isin(keys)]


def get_county_code(state_name, county_name):
    state_name = str(state_name).strip()
    county_name = str(county_name).strip()
    if county_name == "nan":
        return 0


    state_code = only_state[only_state['name'].str.contains(state_name)]
    if len(state_code) == 0:
        return 0

    state_code = state_code["code"].values[0]

    name_bol = only_county['name'].str.contains(county_name)
    range_bol = only_county["code"].between(state_code, state_code + 999)
    county_code = only_county[name_bol & range_bol]

    return 0 if len(county_code) == 0 else county_code["code"].values[0]


{nan, 'not fleeing', 'car', 'foot', 'other'}


In [605]:
def election_2020():
    left_count = defaultdict(int)
    right_count = defaultdict(int)
    lead = defaultdict(str)

    left = ["dem"]
    right = ["rep"]


    for index, row in presidency.iterrows():
        state_county = (row["state"], row["county"])

        key = get_county_code(row["state"], row["county"])
        party = row["party"]

        votes = row["voteStates"]

        if party in left:
            left_count[key] += votes
            lead[key] = "dem"
        elif party in right:
            right_count[key] += votes
            lead[key] = "rep"

    return left_count, right_count, lead

def election_2016():
    left_count = defaultdict(int)
    right_count = defaultdict(int)
    lead = defaultdict(str)

    left = ["hillary clinton"]
    right = ["donald trump"]


    for index, row in presidency2016.iterrows():
        state_county = (row["state"], row["county"])

        key = get_county_code(row["state"], row["county"])
        party = row["candidate"]

        votes = row["votes"]

        if party in left:
            left_count[key] += votes
            lead[key] = "dem"
        elif party in right:
            right_count[key] += votes
            lead[key] = "rep"

    return left_count, right_count, lead

left_count_2016, right_count_2016, lead_2016 = election_2016()
left_count, right_count, lead = election_2020()

In [629]:

# TODO if county not specified, find out based on state and city
after_2016_elections = process_date("11/07/2016")
processed_data = []
processed_data_display = []
for i, row in shootings.iterrows():
     state_county = (row["state"], row["county"])

     state_county_ids = get_county_code(row["state"], row["county"])
     state_ids = get_county_code(row["state"], "")

     date = process_date(row["date"])

     # this will skip anything before 2016 elections, because we
     # dont have the data
     if state_county_ids == 0:
         continue

     if date <= after_2016_elections:
         left_count_n = left_count_2016.get(state_county_ids,0 )
         right_count_n = left_count_2016.get(state_county_ids, 0)

         lead_display = lead_2016.get(state_county_ids, 0)
         lead_i = 0 if lead_display == "dem" else 1
         prev_party_display = "uknown"
         prev_lead_i = -1


     else:
         left_count_n = left_count.get(state_county_ids,0 )
         right_count_n = right_count.get(state_county_ids, 0)

         lead_display = lead.get(state_county_ids, 0)
         lead_i = 0 if lead_display == "dem" else 1

         prev_party_display = lead_2016.get(state_county_ids, 0)
         prev_lead_i = 0 if prev_party_display == "dem" else 1


     # TODO: age kdaj hrani null
     age = row["victim"] if row["victim"] != float('nan') else 0
     gender = gender_idx[row["gender"]]
     race = race_idx[row["race"]]
     death = cause_of_death_idx[row["death"]]
     charges = charges_idx[row["charges"]]
     illness = mental_illness_idx.get(row["mental_illness"], mental_illness_idx["unknown"])
     weapon = weapon_idx[row["weapon"]]
     weapon_type = weapon_type_idx[row["weapon_type"]]
     threat = threat_idx[row["threat"]]
     fleeing = fleeing_idx_func(row["fleeing"])


     values_display = (row["county"], row["date"], row["victim"], row["gender"],
                       row["race"], row["death"], row["charges"], row["mental_illness"],
                       row["weapon"], row["weapon_type"], row["threat"],
                       row["fleeing"], left_count_n, right_count_n, prev_party_display, lead_display)
     processed_data_display.append(values_display)

     values = (state_county_ids, date, age, gender, race, death, charges, illness, weapon, weapon_type, threat, fleeing, left_count_n, right_count_n, prev_lead_i, lead_i)
     processed_data.append(values)

In [630]:
header = ["county", "date", "age", "gender", "race", "death", "charges", "illness",
          "weapon", "weapon_type", "threat", "fleeing", "leftvote", "rightvote", "prev_party", "winner"]

data_displayed = pd.DataFrame.from_records(processed_data_display)
data_displayed.columns = header
data_displayed.to_csv("./test_display.csv")

data = pd.DataFrame.from_records(processed_data)
data.columns = header
data.to_csv("./test.csv")

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,kentucky,10/28/2020,44,male,unknown race,gunshot,no known charges,no,allegedly armed,undetermined,undetermined,,1791,5487,dem
1,indiana,10/24/2020,33,male,unknown race,gunshot,no known charges,no,allegedly armed,knife,attack,not fleeing,5433,19441,dem
2,kansas,10/23/2020,32,male,unknown race,gunshot,no known charges,no,allegedly armed,gun,other,car,3869,12789,dem
3,west virginia,10/22/2020,63,male,unknown race,gunshot,no known charges,no,allegedly armed,undetermined,other,,1451,4199,dem
4,arizona,10/22/2020,30,male,hispanic,gunshot,no known charges,no,allegedly armed,gun,attack,car,65355,88597,dem
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8286,colorado,1/1/2013,21,male,hispanic,gunshot,no known charges,no,allegedly armed,gun,,,27945,27945,rep
8287,california,1/1/2013,26,male,hispanic,gunshot,no known charges,no,allegedly armed,gun,,,64980,64980,dem
8288,new mexico,1/1/2013,49,male,hispanic,gunshot,no known charges,no,allegedly armed,blunt weapon,,,12826,12826,dem
8289,wisconsin,1/1/2013,21,male,white,gunshot,no known charges,drug or alcohol use,allegedly armed,gun,,,11342,11342,rep


In [622]:
data_displayed['test'].value_counts()

In [477]:
id = get_county_code("ohio", "hamilton")
left_count.get(id, 0)

238101