In [1]:
import pandas as pd
import os #os module allows interaction with the operating system, like reading from or writing to the file system.



PATH_IN = './data/': Sets the variable PATH_IN to the string './data/', which represents a directory path where your data is stored.

fname = os.path.join(PATH_IN, 'interactive_data.csv'): Uses os.path.join to concatenate the directory path in PATH_IN with the string 'interactive_data.csv', representing the filename. This is used to form a complete file path that is stored in the variable fname.

In [2]:
# Averages from 2012 to 2014 
PATH_IN = './data/'
fname = os.path.join(PATH_IN, 'interactive_data.csv')
df = pd.read_csv(fname, index_col=0)
df.head(5)

Unnamed: 0,Intent,Gender,Age,Race,Deaths,Population,Rate
1,None selected,None selected,None selected,None selected,33599,316299978,10.6
2,None selected,None selected,None selected,White,22079,197369634,11.2
3,None selected,None selected,None selected,Black,7765,38896382,20.0
4,None selected,None selected,None selected,Hispanic,3007,54049078,5.6
5,None selected,None selected,None selected,Asian/Pacific Islander,442,16315561,2.7


In [3]:
df['Intent'].unique()

array(['None selected', 'Suicide', 'Homicide', 'Accident', 'Unknown'],
      dtype=object)

In [4]:
df['Age'].unique()

array(['None selected', 'Under 15', '15 - 34', '35 - 64', '65+', '5'],
      dtype=object)

In [5]:
df['Race'].unique()

array(['None selected', 'White', 'Black', 'Hispanic',
       'Asian/Pacific Islander', 'Other'], dtype=object)

In [6]:
df['Gender'].unique()

array(['None selected', 'Female', 'Male'], dtype=object)

Note that there are other ways to approach this analysis, the following is one possible correct solution.
The file 'interactive_data.csv' contains aggregations across different categorical filters.
Specifically, 'None selected' corresponds to the case when a filter is not applied to a column.
Thus, such cases correspond to all possible values under that column.
For example, if all columns have the entry 'None selected', then this corresponds to an aggregation across all entries.
Thus, the first pre-processing step is to remove rows where at least one column has the value 'None selected'.

In [7]:
df_filtered = df.loc[~(df=='None selected').any(axis=1)]

df == 'None selected' will compare every element in the DataFrame df to the string 'None selected' and return a DataFrame of the same shape as df but with True and False indicating where the condition is met.

any() function will check along the specified axis (axis=1 refers to rows) and return a Series object representing whether any element is True along the row axis. Essentially, it checks if any element in a row is 'None selected' and returns True or False for each row.

~ is a bitwise NOT operator, which inverts the boolean values. So if the .any(axis=1) returned True for a row (meaning 'None selected' was found in that row), the ~ will change it to False, and vice versa.

.loc[] indexer is being used to select only the rows where the condition is True, meaning it will contain only rows where 'None selected' is not present in any element of the row. When you pass a single boolean Series to .loc[], it will interpret it as selecting rows where the Series has True values. df.loc[row_selector, column_selector]

#### Nearly two-thirds of gun deaths are suicides.

In [8]:
all_deaths = df_filtered['Deaths'].sum()
suicides = df_filtered[df_filtered['Intent'] == 'Suicide']['Deaths'].sum()

print(f'{suicides/all_deaths*100}% of gun deaths are suicides')

62.68194671826165% of gun deaths are suicides


#### More than 85 percent of suicide victims are male.

In [9]:
male_suicides = df_filtered[
                    (df_filtered['Intent'] == 'Suicide') & 
                    (df_filtered['Gender'] == 'Male')
                ]['Deaths'].sum()

print(f'{male_suicides/suicides*100}% of suicide victims are male')

86.24275809668535% of suicide victims are male


#### Around a third of all gun deaths are homicides.

In [10]:
homicides = df_filtered[df_filtered['Intent'] == 'Homicide']['Deaths'].sum()
print(f'{homicides/all_deaths*100}% of all gun deaths are homicides')

34.906980205387704% of all gun deaths are homicides


#### Around two-thirds of homicide victims who are males in the age-group of 15--34 are black.

In [11]:
young_black_male_homicides = df_filtered[
                                        (df_filtered['Intent'] == 'Homicide') & 
                                        (df_filtered['Gender'] == 'Male') & 
                                        (df_filtered['Age'] == '15 - 34') &
                                        (df_filtered['Race'] == 'Black')
                             ]['Deaths'].sum()

young_male_homicides = df_filtered[
                                  (df_filtered['Intent'] == 'Homicide') & 
                                  (df_filtered['Gender'] == 'Male') & 
                                  (df_filtered['Age'] == '15 - 34')
                       ]['Deaths'].sum()

print(f'''{young_black_male_homicides/young_male_homicides*100}% of homicide victims who are males in the age-group 15-34 are black''')

66.12482748044778% of homicide victims who are males in the age-group 15-34 are black


#### Women constitue only 15 percent of the total homicide victims.

In [12]:
women_homicides = df_filtered[
                             (df_filtered['Intent'] == 'Homicide') &
                             (df_filtered['Gender'] == 'Female')
                    ]['Deaths'].sum()
print(f'{women_homicides/homicides*100}% of the total homicide victims are women')

15.289502856655583% of the total homicide victims are women
