# Analysis of gun deaths dataset in US from 2012 to 2014

+ The data set if available [here](https://github.com/fivethirtyeight/guns-data)
+ Data description
  + '' - id
  + 'year' - the year in which the fatality occurred
  + 'month' - the month in which the fatality occurred
  + 'intent' - the intent of the perpetrator of the crime. This can be:
    + Suicide
    + Accidental
    + Homicide
    + Undetermined
    + NA
  + 'police' - whether a police officer was involved with the shooting. Either 0 (false) or 1 (true)
  + 'sex' -  the gender of the victim. Either M or F
  + 'age' - the age of the victim
  + 'race' - the race of the victim. Can be one of the following:
    + Asian/Pacific Islander
    + Native American/Native Alaskan
    + Black
    + Hispanic
    + White
  + 'hispanic' - a code indicating the Hispanic origin of the victim
  + 'place' - where the shooting occurred
  + 'education' - educational status of the victim. Can be one of the following:
    + 1 -- Less than High School
    + 2 -- Graduated from High School or equivalent
    + 3 -- Some College
    + 4 -- At least graduated from College
    + 5 -- Not available


In [14]:
import csv
# Read the dataset using csv reader
f = open("datasets/guns.csv", "r")
csv_reader = csv.reader(f)

# list of list from csv file (including header)
data_with_header = list(csv_reader)
# Remove the header
header = data_with_header[0]

# Data without header
data = data_with_header[1:len(data_with_header)]

In [2]:
# Count deaths by year
def count_deaths_year():
    death_years = dict()
    
    for row in data:
        # Extract year from the current row
        year = int(row[1])
        
        if year not in death_years:
            death_years[year] = 1
        else:
            death_years[year] += 1
    
    return death_years

In [3]:
year_counts = count_deaths_year()
print(year_counts)

{2012: 33563, 2013: 33636, 2014: 33599}


In [4]:
# Count deaths by months and years
# For this we create a datetime object using 
# date and month columns and then count deaths
# for each datetime object
import datetime

def count_deaths_month_year():
    # Create a list comprehension of datetime objects from 
    # year and month column
    date_time = [datetime.datetime(year = int(row[1]), month = int(row[2]), day = 1) for row in data]
    
    # dict containing deaths on a month, year (keys in the
    # dict are unique datetime objects)
    date_counts = dict()
    for date in date_time:
        if date not in date_counts:
            date_counts[date] = 1
        else:
            date_counts[date] += 1
    
    return date_counts

In [5]:
date_counts = count_deaths_month_year()

In [6]:
# Count deaths by sex
def count_deaths_sex():
    # list comprehension to extract sex of all deaths
    genders = [row[5] for row in data]
    
    # dict contaning deaths of each sex
    sex_counts = dict()
    for gender in genders:
        if gender not in sex_counts:
            sex_counts[gender] = 1
        else:
            sex_counts[gender] += 1
    
    return sex_counts

# Count deaths by race
def count_deaths_race():
    # list comprehension to get races from the data
    races = [row[7] for row in data]
    
    # dict contaning deaths of each race
    race_counts = dict()
    for race in races:
        if race not in race_counts:
            race_counts[race] = 1
        else:
            race_counts[race] += 1
            
    return race_counts

In [7]:
sex_counts = count_deaths_sex()
print(sex_counts)
race_counts = count_deaths_race()
print(race_counts)

{'M': 86349, 'F': 14449}
{'Native American/Native Alaskan': 917, 'Black': 23296, 'White': 66237, 'Asian/Pacific Islander': 1326, 'Hispanic': 9022}


To get the death count for each race per hundred 1000
we need census data to get the population of each race

In [15]:
# Read the census data
f = open("datasets/census.csv", "r")
csv_reader = csv.reader(f)
census = list(csv_reader)

In [16]:
# Count population of each race
# We need a dict with same keys as race_counts
population_races = dict()
population_races["White"] = int(census[1][10])
population_races["Native American/Native Alaskan"] = int(census[1][13])
population_races["Hispanic"] = int(census[1][11])
population_races["Black"] = int(census[1][12])
population_races["Asian/Pacific Islander"] = int(census[1][14]) + int(census[1][15])
print(population_races)

{'Native American/Native Alaskan': 3739506, 'Hispanic': 44618105, 'White': 197318956, 'Asian/Pacific Islander': 15834141, 'Black': 40250635}


In [17]:
# Count death by race in every 100k people for each race
race_per_hundredk = dict()

for race, count in race_counts.items():
    race_per_hundredk[race] = (count / population_races[race]) * 100000
print(race_per_hundredk)

{'Native American/Native Alaskan': 0, 'Asian/Pacific Islander': 0, 'White': 0, 'Black': 0, 'Hispanic': 0}


In [18]:
# Filtering intent (using races for 'Homicide' only)
def count_deaths_races_intent():
    race_counts = dict()
    
    for row in data:
        race = row[7]
        intent = row[3]
        
        if intent == "Homicide":
            if race not in race_counts:
                race_counts[race] = 1
            else:
                race_counts[race] += 1
    return race_counts


In [19]:
race_counts_homicide = count_deaths_races_intent()
print(race_counts_homicide)

{'Native American/Native Alaskan': 326, 'Black': 19510, 'White': 9147, 'Asian/Pacific Islander': 559, 'Hispanic': 5634}


In [20]:
# Count deaths for race "Homicide" in every 100k people for each race
homicide_race_per_hundredk = dict()

for race, count in race_counts_homicide.items():
    homicide_race_per_hundredk[race] = (count / population_races[race]) * 100000
print(homicide_race_per_hundredk)

{'Native American/Native Alaskan': 0, 'Asian/Pacific Islander': 0, 'White': 0, 'Black': 0, 'Hispanic': 0}
