In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import gzip

In [2]:
# Plotting gender stuff
df = pd.read_csv(gzip.open('data_cleaned/cleanedBigGun_gender.csv.gz'))
df.head()

Unnamed: 0,incident_id,date,state,city_or_county,address,n_killed,n_injured,sum n_killed&n_injured,latitude,longitude,...,participant_name,participant_status,participant_type,# of Victims,# of Suspects,# of Male Victims,# of Female Victims,# of Male Suspects,# of Female Suspects,year
0,854799,2014-01-01,Florida,Milton,,0,0,0,30.6884,-87.048,...,0::Silvano Zaragoza-Ambriz||1::Kyle James Corbi,"0::Unharmed, Arrested||1::Unharmed, Arrested",0::Subject-Suspect||1::Subject-Suspect,0,2,,,2.0,0.0,2014
1,98134,2014-01-01,Louisiana,Marrero,6100 block of Ray St,1,0,1,29.8862,-90.1087,...,0::Sgt. Joseph Anderson||1::Darwin Bethune,0::Killed||1::Unharmed,0::Victim||1::Subject-Suspect,1,1,1.0,0.0,1.0,0.0,2014
2,92563,2014-01-01,Mississippi,Bogue Chitto,1347 Brumfield Rd SW,1,0,1,31.3762,-90.5601,...,0::Karlianna Celeste Brumfield,0::Killed||1::Unharmed,0::Victim||1::Subject-Suspect,1,1,0.0,1.0,1.0,0.0,2014
3,97976,2014-01-01,Hawaii,Lihue,,1,0,1,21.9986,-159.356,...,0::Amby Cruz||1::Giovani Corpuz,0::Killed||1::Unharmed,0::Victim||1::Subject-Suspect,1,1,1.0,0.0,1.0,0.0,2014
4,92117,2014-01-01,Kentucky,Cynthiana,,0,1,1,38.4333,-84.3542,...,0::Dustin Mullins||1::Timothy Turner,"0::Injured||1::Unharmed, Arrested",0::Victim||1::Subject-Suspect,1,1,1.0,0.0,1.0,0.0,2014


In [3]:
df.columns

Index(['incident_id', 'date', 'state', 'city_or_county', 'address', 'n_killed',
       'n_injured', 'sum n_killed&n_injured', 'latitude', 'longitude',
       'n_guns_involved', 'STATE', 'gun_type', 'incident_characteristics',
       'participant_age', 'participant_age_group', 'participant_gender',
       'participant_name', 'participant_status', 'participant_type',
       '# of Victims', '# of Suspects', '# of Male Victims',
       '# of Female Victims', '# of Male Suspects', '# of Female Suspects',
       'year'],
      dtype='object')

In [4]:
data = pd.DataFrame(df.groupby('year').sum()['# of Male Victims'])
data['# of Female Victims'] = df.groupby('year').sum()['# of Female Victims']
data['# of Male Suspects'] = df.groupby('year').sum()['# of Male Suspects']
data['# of Female Suspects'] = df.groupby('year').sum()['# of Female Suspects']
data.reset_index(inplace = True)
data.rename(columns={"year": "Year"}, inplace = True)
data

Unnamed: 0,Year,# of Male Victims,# of Female Victims,# of Male Suspects,# of Female Suspects
0,2014,1953.0,630.0,3140.0,235.0
1,2015,4950.0,1720.0,12230.0,1099.0
2,2016,10373.0,3000.0,19874.0,1678.0
3,2017,15198.0,3633.0,26085.0,2241.0
4,2018,3111.0,819.0,6014.0,547.0


In [5]:
for column_name in data.columns:
    data[column_name] = data[column_name].astype('int64')

In [6]:
data.dtypes

Year                    int64
# of Male Victims       int64
# of Female Victims     int64
# of Male Suspects      int64
# of Female Suspects    int64
dtype: object

In [7]:
data

Unnamed: 0,Year,# of Male Victims,# of Female Victims,# of Male Suspects,# of Female Suspects
0,2014,1953,630,3140,235
1,2015,4950,1720,12230,1099
2,2016,10373,3000,19874,1678
3,2017,15198,3633,26085,2241
4,2018,3111,819,6014,547


In [8]:
from bokeh.core.properties import value
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure
from bokeh.transform import dodge
from bokeh.models.widgets import DataTable, TableColumn
from bokeh.layouts import layout

output_file('genderBarCharts.html')

width_of_each_bar = .2

data.Year = data.Year.astype(str)
data['# of Male Victims'] = data['# of Male Victims'].astype(str)
data['# of Female Victims'] = data['# of Female Victims'].astype(str)
df = data.to_dict(orient='list')
years = df['Year']

data_max = data[['# of Female Suspects','# of Female Victims','# of Male Suspects','# of Male Victims']].apply(pd.to_numeric).values.max()

source = ColumnDataSource(data=df)

p = figure(x_range=years, y_range=(0, data_max + 3000), 
           plot_height=600, plot_width=650, title="Gender Count of Victims and Suspects by Year",
           toolbar_location=None)

renderers1 = p.vbar(x=dodge('Year', -0.3, range=p.x_range), top='# of Male Victims', width=width_of_each_bar, source=source,
       color="red", legend=value("# of Male Victims"))

hover1 = HoverTool(tooltips=[
    ("Year", "@Year"),
    ("# of Male Victims", "@{# of Male Victims}{0,0[.]00}")
], renderers=[renderers1])
p.add_tools(hover1)

renderers2 = p.vbar(x=dodge('Year', -0.1,  range=p.x_range), top='# of Female Victims', width=width_of_each_bar, source=source,
       color="pink", legend=value("# of Female Victims"))

hover2 = HoverTool(tooltips=[
    ("Year", "@Year"),
    ("# of Female Victims", "@{# of Female Victims}{0,0[.]00}")
], renderers=[renderers2])
p.add_tools(hover2)

renderers3 = p.vbar(x=dodge('Year', 0.1, range=p.x_range), top='# of Male Suspects', width=width_of_each_bar, source=source,
       color="blue", legend=value("# of Male Suspects"))

hover3 = HoverTool(tooltips=[
    ("Year", "@Year"),
    ("# of Male Suspects", "@{# of Male Suspects}{0,0[.]00}")
], renderers=[renderers3])
p.add_tools(hover3)

renderers4 = p.vbar(x=dodge('Year', 0.3,  range=p.x_range), top='# of Female Suspects', width=width_of_each_bar, source=source,
       color="lightskyblue", legend=value("# of Female Suspects"))

hover4 = HoverTool(tooltips=[
    ("Year", "@Year"),
    ("# of Female Suspects", "@{# of Female Suspects}{0,0[.]00}")
], renderers=[renderers4])
p.add_tools(hover4)


#p.x_range.range_padding = 0.2
p.xgrid.grid_line_color = None
p.legend.location = "top_center"
p.legend.orientation = "horizontal"

columns = [
        TableColumn(field="Year", title="Year"),
        TableColumn(field="# of Male Victims", title="# of Male Victims"),
        TableColumn(field="# of Female Victims", title="# of Female Victims"),
        TableColumn(field="# of Male Suspects", title="# of Male Suspects"),
        TableColumn(field="# of Female Suspects", title="# of Female Suspects")
    ]
p_data = DataTable(source=source, columns=columns, width=600, height=600, selectable = True)

# show the results
show(layout([p, p_data]))

  elif np.issubdtype(type(obj), np.float):
