# Data for Tableau Analysis

## This script contains the following points:
### 1. Importing Libraries and Data
### 2. Cleaning and Deriving Data
### 3. Export Data

## 1. Import Libraries and Data

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import os

In [2]:
# Import data
path = r'C:\Users\nguye\OneDrive\05-2023 US Mass Shooting Data Analysis'
df = pd.read_csv(os.path.join(path, '02 Data', 'Prepared Data', 'Mass_Shootings_GA.csv'))

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,Incident ID,Incident Date,State,City Or County,Victims Injured,Victims Killed,Total Victims,Suspects Injured,Suspects Killed,Suspects Arrested,Total Suspects,Suspect Category
0,0,2589765,"May 6, 2023",Ohio,Columbus,3,1,4,1,0,0,1,Single suspect
1,1,2589804,"May 5, 2023",Mississippi,Natchez,2,2,4,0,0,0,0,No suspects
2,2,2589790,"May 5, 2023",Mississippi,Vicksburg,4,0,4,0,0,0,0,No suspects
3,3,2589699,"May 5, 2023",Mississippi,Ocean Springs,5,1,6,0,0,0,0,No suspects
4,4,2587918,"May 3, 2023",Illinois,Chicago,4,0,4,0,0,0,0,No suspects


In [4]:
df.shape

(4232, 13)

## 2. Cleaning and Deriving Data

In [5]:
# Remove 'Unnamed: 0' column
df = df.drop(columns = {'Unnamed: 0'})

In [6]:
df.head()

Unnamed: 0,Incident ID,Incident Date,State,City Or County,Victims Injured,Victims Killed,Total Victims,Suspects Injured,Suspects Killed,Suspects Arrested,Total Suspects,Suspect Category
0,2589765,"May 6, 2023",Ohio,Columbus,3,1,4,1,0,0,1,Single suspect
1,2589804,"May 5, 2023",Mississippi,Natchez,2,2,4,0,0,0,0,No suspects
2,2589790,"May 5, 2023",Mississippi,Vicksburg,4,0,4,0,0,0,0,No suspects
3,2589699,"May 5, 2023",Mississippi,Ocean Springs,5,1,6,0,0,0,0,No suspects
4,2587918,"May 3, 2023",Illinois,Chicago,4,0,4,0,0,0,0,No suspects


In [9]:
df['Total Victims'].describe()

count    4232.000000
mean        5.135633
std         8.101754
min         0.000000
25%         4.000000
50%         4.000000
75%         5.000000
max       499.000000
Name: Total Victims, dtype: float64

In [18]:
# Create a new column grouping the 'Total Victims' into a 'Casualty Category' of 'Low', 'Medium', 'High'
result = []

for value in df['Total Victims']:
    if (value >= 0) & (value <= 4):
        result.append('Low')
    elif (value >= 5) & (value <= 25):
        result.append('Medium')
    else: 
        result.append('High')

In [19]:
result

['Low',
 'Low',
 'Low',
 'Medium',
 'Low',
 'Medium',
 'Low',
 'Low',
 'Low',
 'Medium',
 'Low',
 'Medium',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Medium',
 'Medium',
 'Low',
 'Medium',
 'Low',
 'Low',
 'Medium',
 'Medium',
 'Medium',
 'Low',
 'Medium',
 'Low',
 'Low',
 'Medium',
 'Low',
 'Low',
 'Medium',
 'Low',
 'High',
 'Medium',
 'Low',
 'Medium',
 'Low',
 'Low',
 'Medium',
 'Medium',
 'Low',
 'Low',
 'Medium',
 'Low',
 'Medium',
 'Low',
 'Low',
 'Medium',
 'Medium',
 'Low',
 'Medium',
 'Low',
 'Low',
 'Low',
 'Medium',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Medium',
 'Low',
 'Medium',
 'Medium',
 'Medium',
 'Low',
 'Medium',
 'Medium',
 'Low',
 'Medium',
 'Medium',
 'Low',
 'Medium',
 'Medium',
 'Medium',
 'Low',
 'Low',
 'Medium',
 'Medium',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Medium',
 'Medium',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Low',
 'Medium',
 'Medium',

In [20]:
# Add the list to dataframe
df['Casualty Category'] = result

In [21]:
df.head()

Unnamed: 0,Incident ID,Incident Date,State,City Or County,Victims Injured,Victims Killed,Total Victims,Suspects Injured,Suspects Killed,Suspects Arrested,Total Suspects,Suspect Category,Total Incients,Casualty Category
0,2589765,"May 6, 2023",Ohio,Columbus,3,1,4,1,0,0,1,Single suspect,1,Low
1,2589804,"May 5, 2023",Mississippi,Natchez,2,2,4,0,0,0,0,No suspects,3,Low
2,2589790,"May 5, 2023",Mississippi,Vicksburg,4,0,4,0,0,0,0,No suspects,3,Low
3,2589699,"May 5, 2023",Mississippi,Ocean Springs,5,1,6,0,0,0,0,No suspects,3,Medium
4,2587918,"May 3, 2023",Illinois,Chicago,4,0,4,0,0,0,0,No suspects,2,Low


In [22]:
df['Casualty Category'].value_counts(dropna = False)

Low       2465
Medium    1754
High        13
Name: Casualty Category, dtype: int64

## 3. Export Data

In [23]:
df.to_csv(os.path.join(path, '02 Data', 'Prepared Data', 'Mass_Shootings_Tableau.csv'))