In [112]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "vscode+jupyterlab+notebook_connected"

- **Dataset(s) to be used:** (https://data.cityofnewyork.us/Public-Safety/NYPD-Shooting-Incident-Data-Historic-/833y-fsy8/about_data)
- **Analysis question:** Which borough in New York City has the highest cumulative number of shootings, and during which time period of the day do these shootings occur most frequently?
- **Columns that will (likely) be used:**
  - OCCUR_TIME
  - BORO
- **Hypothesis**: The borough Brooklyn and the time period Evening (6:00 PM - 12:00 AM) have the highest cumulative shooting incidents.
- **Site URL:** [URL from Publish section]

The data is a breakdown of every shooting incident that occurred in NYC going back to 2006 through the end of the previous calendar year. This data is manually extracted every quarter and reviewed by the Office of Management Analysis and Planning before being posted on the NYPD website. Each record represents a shooting incident in NYC and includes information about the event, the location and time of occurrence. In addition, information related to suspect and victim demographics is also included. This data can be used by the public to explore the nature of shooting/criminal activity. Please refer to the attached data footnotes for additional information about this dataset.

In [113]:
NYC_shooting = pd.read_csv("NYPD_Shooting_Incident_Data__Historic__20241206.csv")
NYC_shooting.head()

Unnamed: 0,INCIDENT_KEY,OCCUR_DATE,OCCUR_TIME,BORO,LOC_OF_OCCUR_DESC,PRECINCT,JURISDICTION_CODE,LOC_CLASSFCTN_DESC,LOCATION_DESC,STATISTICAL_MURDER_FLAG,...,PERP_SEX,PERP_RACE,VIC_AGE_GROUP,VIC_SEX,VIC_RACE,X_COORD_CD,Y_COORD_CD,Latitude,Longitude,Lon_Lat
0,244608249,05/05/2022,00:10:00,MANHATTAN,INSIDE,14,0.0,COMMERCIAL,VIDEO STORE,True,...,M,BLACK,25-44,M,BLACK,986050.0,214231.0,40.754692,-73.9935,POINT (-73.9935 40.754692)
1,247542571,07/04/2022,22:20:00,BRONX,OUTSIDE,48,0.0,STREET,(null),True,...,(null),(null),18-24,M,BLACK,1016802.0,250581.0,40.854402,-73.88233,POINT (-73.88233 40.854402)
2,84967535,05/27/2012,19:35:00,QUEENS,,103,0.0,,,False,...,,,18-24,M,BLACK,1048632.0,198262.0,40.710634,-73.767773,POINT (-73.76777349199995 40.71063412500007)
3,202853370,09/24/2019,21:00:00,BRONX,,42,0.0,,,False,...,M,UNKNOWN,25-44,M,BLACK,1014493.0,242565.0,40.832417,-73.890714,POINT (-73.89071440599997 40.832416753000075)
4,27078636,02/25/2007,21:00:00,BROOKLYN,,83,0.0,,,False,...,M,BLACK,25-44,M,BLACK,1009149.375,190104.703125,40.688443,-73.910219,POINT (-73.91021857399994 40.68844345900004)


In [114]:
def categorize_time(row):
    if "06:00:00" <= row < "12:00:00":
        return "Morning"
    elif "12:00:00" <= row < "18:00:00":
        return "Afternoon"
    elif "18:00:00" <= row < "24:00:00":
        return "Evening"
    else:
        return "Night/Dawn"

In [115]:
NYC_shooting['Time_Period'] = NYC_shooting['OCCUR_TIME'].apply(categorize_time)
time_counts = NYC_shooting['Time_Period'].value_counts()
time_counts

Time_Period
Evening       11343
Night/Dawn    10178
Afternoon      5183
Morning        1858
Name: count, dtype: int64

In [116]:
borough_time_data = NYC_shooting.groupby(['BORO', 'Time_Period']).size().reset_index(name='Counts')
borough_time_data

Unnamed: 0,BORO,Time_Period,Counts
0,BRONX,Afternoon,1444
1,BRONX,Evening,3475
2,BRONX,Morning,492
3,BRONX,Night/Dawn,2965
4,BROOKLYN,Afternoon,2259
5,BROOKLYN,Evening,4592
6,BROOKLYN,Morning,784
7,BROOKLYN,Night/Dawn,3711
8,MANHATTAN,Afternoon,584
9,MANHATTAN,Evening,1505


In [117]:
pivot_borough_time = borough_time_data.pivot(index='BORO', columns='Time_Period', values='Counts').fillna(0)
pivot_borough_time

Time_Period,Afternoon,Evening,Morning,Night/Dawn
BORO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BRONX,1444,3475,492,2965
BROOKLYN,2259,4592,784,3711
MANHATTAN,584,1505,229,1444
QUEENS,753,1474,300,1744
STATEN ISLAND,143,297,53,314


In [118]:
fig = px.bar(
    borough_time_data,
    x='BORO',
    y='Counts',
    color='Time_Period',
    title='Shooting Incidents by Borough and Time Period',
    labels={'Counts': 'Number of Incidents', 'BORO': 'Borough', 'Time_Period': 'Time Period'},
    barmode='group'
)

fig.show()