In [1]:
import pandas as pd 
from os import path
import json
from math import isnan
from dateutil import parser

In [2]:
STATE_NAME = {'AL': 'Alabama', 'AK': "Alaska", 'AZ': 'Arizona', 'AR': 'Arkansas', 'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut', 'DE': 'Delaware', 'DC': 'District of Columbia', 'FL': 'Florida', 'GA': 'Georgia', 'HI': 'Hawaii', 'ID': 'Idaho', 'IL': 'Illinois', 'IN': 'Indiana', 'IA': 'Iowa', 'KS': 'Kansas', 'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', 'MD': 'Maryland', 'MA': 'Massachusetts', 'MI': 'Michigan', 'MN': 'Minnesota', 'MS': 'Mississippi', 'MO': 'Missouri', 'MT': 'Montana', 'NE': 'Nebraska', 'NV': 'Nevada', 'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico', 'NY': 'New York', 'NC': 'North Carolina', 'ND': 'North Dakota', 'OH': 'Ohio', 'OK': 'Oklahoma', 'OR': 'Oregon', 'PA': 'Pennsylvania', 'RI': 'Rhode Island', 'SC': 'South Carolina', 'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas',  'UT': 'Utah', 'VT': 'Vermont', 'VA': 'Virginia', 'WA': 'Washington', 'WV': 'West Virginia', 'WI': 'Wisconsin', 'WY': 'Wyoming'}
STATE_AREAS = {'AL': 52419, 'AK': 665400, 'AZ': 113998, 'AR': 53179, 'CA': 163696, 'CO': 104185, 'CT': 5543, 'DE': 1982, 'DC': 68.34, 'FL': 65758, 'GA': 59425, 'HI': 4028, 'ID': 83642, 'IL': 57915, 'IN': 36418, 'IA': 55857, 'KS': 82278, 'KY': 40409, 'LA': 52378, 'ME': 35385, 'MD': 12407, 'MA': 10565, 'MI': 96716, 'MN': 86943, 'MS': 48430, 'MO': 69715, 'MT': 147040, 'NE': 77348, 'NV': 110567, 'NH': 9349, 'NJ': 8723, 'NM': 121697, 'NY': 54555, 'NC': 53819, 'ND': 70704, 'OH': 44825, 'OK': 69899, 'OR': 98466, 'PA': 46055, 'RI': 1214, 'SC': 32020, 'SD': 77116, 'TN': 42180, 'TX': 268597,  'UT': 84899, 'VT': 9616, 'VA': 42775, 'WA': 71300, 'WV': 24038, 'WI': 65498, 'WY': 97914 }
STATE_POPULATION = {'AL': 4843737, 'AK': 737075, 'AZ': 6732873, 'AR': 2968759, 'CA': 38586706, 'CO': 5352637, 'CT': 3595697, 'DE': 933131, 'DC': 663603, 'FL': 19853880, 'GA': 10071204, 'HI': 1415335, 'ID': 1632248, 'IL': 12885092, 'IN': 6596019, 'IA': 3110643, 'KS': 2901861, 'KY': 4416992, 'LA': 4645938, 'ME': 1331217, 'MD': 5960064, 'MA': 6764864, 'MI': 9932033, 'MN': 5452665, 'MS': 2991892, 'MO': 6059130, 'MT': 1022657, 'NE': 1879955, 'NV': 2818935, 'NH': 1334257, 'NJ': 8867277, 'NM': 2090236, 'NY': 19653431, 'NC': 9937295, 'ND': 738736, 'OH': 11606573, 'OK': 3879187, 'OR': 3965447, 'PA': 12792392, 'RI': 1056511, 'SC': 4826858, 'SD': 849670, 'TN': 6544617, 'TX': 26963092,  'UT': 2938327, 'VT': 625693, 'VA': 8315430, 'WA': 7057531, 'WV': 1850569, 'WI': 5753199, 'WY': 583159}

In [3]:
def read_csv(loc='', filename=''):
    csv_filepath = path.join(loc, filename)
    return pd.read_csv(csv_filepath, encoding='ISO-8859-1')

In [4]:
def read_json(loc='', filename=''):
    json_filepath = path.join(loc, filename)
    with open(json_filepath, 'r') as jf:
        data = json.load(jf)
    return data 

In [5]:
def write_json(data, loc='', filename='', indent=2):
    json_filepath = path.join(loc, filename)
    with open(json_filepath, 'w') as jd:
        json.dump(data, jd, indent=indent)

In [6]:
def get_state_counts(data):
    state_counts = dict()
    for i in range(data.shape[0]):
        code = data['state'][i].upper()
        state_name = get_state_name(code)
        state_area = get_state_name(code)
        gender = data['gender'][i]
        ageGroup = str(data['ageGroup'][i])
        
        city = data['city'][i].strip()

        if state_name in state_counts :
            state_counts[state_name]['count'] += 1
            state_counts[state_name]['count_per_area'] +=1
            state_counts[state_name]['count_per_population'] +=1
        else :
            state_counts[state_name] = {
                'code': code,
                'count': 1,
                'count_per_area':1,
                'count_per_population':1,
                'genderMale': 0,
                'genderFemale': 0,
                'genderUnknown': 0,
                'ageGroup1': 0,
                'ageGroup2': 0,
                'ageGroup3': 0,
                'ageGroupUnknown': 0,
                'cities': {},
            }

        if type(gender) != type(1.0):
            if gender.lower() == 'm':
                state_counts[state_name]['genderMale'] += 1
            elif gender.lower() == 'f':
                state_counts[state_name]['genderFemale'] += 1
            else:
                state_counts[state_name]['genderUnknown'] += 1
        else:
            state_counts[state_name]['genderUnknown'] += 1

        if ageGroup == '1.0':
            state_counts[state_name]['ageGroup1'] += 1
        elif ageGroup == '2.0':
            state_counts[state_name]['ageGroup2'] += 1
        elif ageGroup == '3.0':
            state_counts[state_name]['ageGroup3'] += 1
        else:
            state_counts[state_name]['ageGroupUnknown'] += 1

        if city in state_counts[state_name]['cities']:
            state_counts[state_name]['cities'][city] += 1
        else:
            state_counts[state_name]['cities'][city] = 1
    for i in state_counts:
        area = STATE_AREAS[state_counts[i]['code']]
        state_counts[i]['count_per_area'] = state_counts[i]['count_per_area']*100000/area
    for i in state_counts:
        population = STATE_POPULATION[state_counts[i]['code']]
        state_counts[i]['count_per_population'] = state_counts[i]['count_per_population']*20000000/population
    return state_counts

In [7]:
def get_city_counts(data):
    city_counts = dict()

    for i in range(data.shape[0]):

        state_name = get_state_name(data['state'][i].upper())
        gender = data['gender'][i]
        ageGroup = str(data['ageGroup'][i])
        lat = str(data['lat'][i])
        lon = str(data['lng'][i])

        city = data['city'][i].strip()

        if state_name not in city_counts:
            city_counts[state_name] = {}

        state = city_counts[state_name]

        if city in state:
            state[city]['count'] += 1
        else:
            state[city] = {
                'count': 1,
                'lat': lat,
            'lon': lon,
                'ageGroup1': 0,
                'ageGroup2': 0,
                'ageGroup3': 0,
                'ageGroupUnknown': 0,
                'genderMale': 0,
                'genderFemale': 0,
                'genderMale': 0,
                'genderUnknown': 0,
            }

        if type(gender) != type(1.0):
            if gender.lower() == 'm':
                state[city]['genderMale'] += 1
            elif gender.lower() == 'f':
                state[city]['genderFemale'] += 1
            else:
                state[city]['genderUnknown'] += 1
        else:
            state[city]['genderUnknown'] += 1

        if ageGroup == '1.0':
            state[city]['ageGroup1'] += 1
        elif ageGroup == '2.0':
            state[city]['ageGroup2'] += 1
        elif ageGroup == '3.0':
            state[city]['ageGroup3'] += 1
        else:
            state[city]['ageGroupUnknown'] += 1

        city_counts[state_name] = state

    return city_counts

In [8]:
def get_city_victims(data):
    city_victims = dict()

    for i in range(data.shape[0]):
        state_name = get_state_name(data['state'][i].upper())

        city = data['city'][i].strip()
        victimID = data['victimID'][i]
        name = data['name'][i]
        age = data['age'][i]
        url = data['url'][i]
        ageGroup = data['ageGroup'][i]
        date = parser.parse(data['date'][i])
        gender = data['gender'][i]

        if state_name in city_victims:
            state = city_victims[state_name]
        else:
            state = {}

        if city in state:
            state[city]['count'] += 1
        else:
            state[city] = {
                'count': 1,
                'victims': []
            }

        victim = {
            '_id': str(victimID),
            'url': url,
            'name': name if type(name) != type(1.0) else 'Unknown',
            'age': str(int(age)) if not isnan(age) else 'Unknown',
            'ageGroup': str(int(ageGroup)) if not isnan(ageGroup) else 'Unknown',
            'date': date.strftime('%d %b %Y'),
            'gender': str(gender) if type(gender) is not type(1.0) else 'Unknown',
            'city': city,
            'state': state_name
        }

        state[city]['victims'].append(victim)

        city_victims[state_name] = state

    return city_victims

In [9]:
def get_victims_list(data):
    victims = {}

    counts = {
        'M': {
            '1.0':0,
            '2.0':0,
            '3.0':0,
            'total': 0,
        },
        'F': {
            '1.0':0,
            '2.0':0,
            '3.0':0,
            'total': 0
        }
    }

    for i in range(data.shape[0]):
        state_name = get_state_name(data['state'][i].upper())

        city = data['city'][i].strip()
        victimID = data['victimID'][i]
        name = data['name'][i]
        age = data['age'][i]
        url = data['url'][i]
        ageGroup = data['ageGroup'][i]
        date = parser.parse(data['date'][i])
        gender = data['gender'][i]

        victims[str(victimID)] = {
            'url': url,
            'name': name if type(name) != type(1.0) else 'Unknown',
            'age': str(int(age)) if not isnan(age) else 'Unknown',
            'ageGroup': str(int(ageGroup)) if not isnan(ageGroup) else 'Unknown',
            'date': date.strftime('%d %b %Y'),
            'state': state_name,
            'city': city,
            'gender': str(gender) if type(gender) is not type(1.0) else 'Unknown'
        }

        if str(gender) in counts:
            counts[gender]['total']+=1
            if str(ageGroup) in counts[gender]:
                counts[str(gender)][str(ageGroup)] += 1

    print(counts)

    return victims

In [10]:
def get_state_name(name):
    return STATE_NAME[name]
def get_state_area(name):
    area = STATE_AREA[name]
    return int(area)

slate_gun_deaths = pd.read_csv('SlateGunDeaths.csv')
# state_json = read_json(loc='data', filename='us-states.topojson')

print(slate_gun_deaths.shape)
# print(state_json['objects']['collection']['geometries'][:5])
state_counts = get_state_counts(slate_gun_deaths)
city_counts = get_city_counts(slate_gun_deaths)
city_victims = get_city_victims(slate_gun_deaths)
victims = get_victims_list(slate_gun_deaths)
write_json(state_counts, filename='stateCounts.json')
write_json(city_counts, filename='cityCounts.json')
write_json(city_victims, filename='cityVictims.json')
write_json(victims, filename='victimsList.json')


(12070, 11)
{'M': {'1.0': 155, '2.0': 503, '3.0': 9310, 'total': 10153}, 'F': {'1.0': 77, '2.0': 69, '3.0': 1676, 'total': 1850}}
