In [34]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from pathlib import Path

root = Path('FireDepartmentCallsSelected.csv')
fire = pd.read_csv(root)
fire.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 315323 entries, 0 to 315322
Data columns (total 12 columns):
 #   Column                                Non-Null Count   Dtype  
---  ------                                --------------   -----  
 0   Call Type                             315323 non-null  object 
 1   Call Date                             315323 non-null  object 
 2   Received DtTm                         315323 non-null  object 
 3   Dispatch DtTm                         315323 non-null  object 
 4   Response DtTm                         308541 non-null  object 
 5   On Scene DtTm                         255840 non-null  object 
 6   Station Area                          315320 non-null  float64
 7   ALS Unit                              315323 non-null  bool   
 8   Call Type Group                       313414 non-null  object 
 9   Unit Type                             315323 non-null  object 
 10  Location                              315323 non-null  object 
 11  

In [35]:
# remove missing values
fire = fire.dropna()
print(fire.shape)
fire.head()

(254240, 12)


Unnamed: 0,Call Type,Call Date,Received DtTm,Dispatch DtTm,Response DtTm,On Scene DtTm,Station Area,ALS Unit,Call Type Group,Unit Type,Location,Neighborhooods - Analysis Boundaries
0,Structure Fire,07/25/2019,07/25/2019 07:16:45 PM,07/25/2019 07:18:47 PM,07/25/2019 07:21:12 PM,07/25/2019 07:21:39 PM,41.0,True,Alarm,ENGINE,"(37.792059516639355, -122.41694742808038)",Nob Hill
1,Structure Fire,07/25/2019,07/25/2019 07:16:45 PM,07/25/2019 07:18:47 PM,07/25/2019 07:19:14 PM,07/25/2019 07:20:43 PM,41.0,True,Alarm,ENGINE,"(37.792059516639355, -122.41694742808038)",Nob Hill
2,Structure Fire,07/25/2019,07/25/2019 07:16:45 PM,07/25/2019 07:18:47 PM,07/25/2019 07:19:29 PM,07/25/2019 07:20:26 PM,41.0,True,Alarm,ENGINE,"(37.792059516639355, -122.41694742808038)",Nob Hill
4,Structure Fire,07/25/2019,07/25/2019 07:16:45 PM,07/25/2019 07:18:47 PM,07/25/2019 07:19:32 PM,07/25/2019 07:21:57 PM,41.0,False,Alarm,TRUCK,"(37.792059516639355, -122.41694742808038)",Nob Hill
5,Structure Fire,07/25/2019,07/25/2019 07:16:45 PM,07/25/2019 07:18:47 PM,07/25/2019 07:19:20 PM,07/25/2019 07:21:54 PM,41.0,False,Alarm,TRUCK,"(37.792059516639355, -122.41694742808038)",Nob Hill


In [24]:
# check column types
fire.dtypes

Call Type                                object
Call Date                                object
Received DtTm                            object
Dispatch DtTm                            object
Response DtTm                            object
On Scene DtTm                            object
Station Area                            float64
ALS Unit                                   bool
Call Type Group                          object
Unit Type                                object
Location                                 object
Neighborhooods - Analysis Boundaries     object
dtype: object

In [25]:
# convert datetime columns from strings to datetime
fire.loc[:,['Received DtTm', 'Dispatch DtTm', 'Response DtTm', 'On Scene DtTm']] = (
    fire.loc[:, 
             ['Received DtTm', 'Dispatch DtTm', 'Response DtTm', 'On Scene DtTm']].apply(pd.to_datetime, format='%m/%d/%Y %I:%M:%S %p')
)
fire['Call Date'] = pd.to_datetime(fire['Call Date'], format='%m/%d/%Y')
fire.dtypes

Call Type                                       object
Call Date                               datetime64[ns]
Received DtTm                           datetime64[ns]
Dispatch DtTm                           datetime64[ns]
Response DtTm                           datetime64[ns]
On Scene DtTm                           datetime64[ns]
Station Area                                   float64
ALS Unit                                          bool
Call Type Group                                 object
Unit Type                                       object
Location                                        object
Neighborhooods - Analysis Boundaries            object
dtype: object

In [26]:
fire.head()

Unnamed: 0,Call Type,Call Date,Received DtTm,Dispatch DtTm,Response DtTm,On Scene DtTm,Station Area,ALS Unit,Call Type Group,Unit Type,Location,Neighborhooods - Analysis Boundaries
0,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:21:12,2019-07-25 19:21:39,41.0,True,Alarm,ENGINE,"(37.792059516639355, -122.41694742808038)",Nob Hill
1,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:19:14,2019-07-25 19:20:43,41.0,True,Alarm,ENGINE,"(37.792059516639355, -122.41694742808038)",Nob Hill
2,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:19:29,2019-07-25 19:20:26,41.0,True,Alarm,ENGINE,"(37.792059516639355, -122.41694742808038)",Nob Hill
4,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:19:32,2019-07-25 19:21:57,41.0,False,Alarm,TRUCK,"(37.792059516639355, -122.41694742808038)",Nob Hill
5,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:19:20,2019-07-25 19:21:54,41.0,False,Alarm,TRUCK,"(37.792059516639355, -122.41694742808038)",Nob Hill


In [27]:
# calculate difference in total seconds between on scene time and received, dispatch, and response time
fire['diff_received_to_onScene'] = fire['On Scene DtTm'] - fire['Received DtTm']
fire['diff_received_to_onScene'] = fire['diff_received_to_onScene'].apply(lambda x: x.total_seconds())
fire['diff_dispatch_to_onScene'] = fire['On Scene DtTm'] - fire['Dispatch DtTm']
fire['diff_dispatch_to_onScene'] = fire['diff_dispatch_to_onScene'].apply(lambda x: x.total_seconds())
fire['diff_response_to_onScene'] = fire['On Scene DtTm'] - fire['Response DtTm']
fire['diff_response_to_onScene'] = fire['diff_response_to_onScene'].apply(lambda x: x.total_seconds())

fire.head()

Unnamed: 0,Call Type,Call Date,Received DtTm,Dispatch DtTm,Response DtTm,On Scene DtTm,Station Area,ALS Unit,Call Type Group,Unit Type,Location,Neighborhooods - Analysis Boundaries,diff_received_to_onScene,diff_dispatch_to_onScene,diff_response_to_onScene
0,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:21:12,2019-07-25 19:21:39,41.0,True,Alarm,ENGINE,"(37.792059516639355, -122.41694742808038)",Nob Hill,294.0,172.0,27.0
1,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:19:14,2019-07-25 19:20:43,41.0,True,Alarm,ENGINE,"(37.792059516639355, -122.41694742808038)",Nob Hill,238.0,116.0,89.0
2,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:19:29,2019-07-25 19:20:26,41.0,True,Alarm,ENGINE,"(37.792059516639355, -122.41694742808038)",Nob Hill,221.0,99.0,57.0
4,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:19:32,2019-07-25 19:21:57,41.0,False,Alarm,TRUCK,"(37.792059516639355, -122.41694742808038)",Nob Hill,312.0,190.0,145.0
5,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:19:20,2019-07-25 19:21:54,41.0,False,Alarm,TRUCK,"(37.792059516639355, -122.41694742808038)",Nob Hill,309.0,187.0,154.0


In [28]:
# convert difference to total time in seconds
fire.describe()

Unnamed: 0,Station Area,diff_received_to_onScene,diff_dispatch_to_onScene,diff_response_to_onScene
count,254240.0,254240.0,254240.0,254240.0
mean,17.749807,531.9988,372.1479,317.4566
std,14.211759,15807.06,15802.61,15802.75
min,1.0,-7965518.0,-7965594.0,-7965637.0
25%,5.0,325.0,214.0,145.0
50%,14.0,439.0,301.0,238.0
75%,32.0,659.0,483.0,440.0
max,51.0,65175.0,16765.0,16758.0


In [29]:
# remove rows where the difference is negative b/c this indicates a clerical reporting error
fire = fire.loc[(fire['diff_received_to_onScene'] >= 0) & (fire['diff_dispatch_to_onScene'] >= 0) &
                (fire['diff_response_to_onScene'] >= 0), :]
fire.describe() # much better

Unnamed: 0,Station Area,diff_received_to_onScene,diff_dispatch_to_onScene,diff_response_to_onScene
count,254204.0,254204.0,254204.0,254204.0
mean,17.750201,564.096021,403.960603,349.279823
std,14.211808,495.608634,335.862606,343.526969
min,1.0,0.0,0.0,0.0
25%,5.0,325.0,214.0,145.0
50%,14.0,439.0,301.0,238.0
75%,32.0,659.0,483.0,440.0
max,51.0,65175.0,16765.0,16758.0


In [30]:
# normalize difference in time to compare between call type group
min_max_scaler = MinMaxScaler()
scaled_diff_onScene = fire.loc[:, fire.filter(regex='diff_').columns].values
fire[['norm_diff_received_to_onScene', 'norm_diff_dispatch_to_onScene', 'norm_diff_response_to_onScene']] = pd.DataFrame(min_max_scaler.fit_transform(scaled_diff_onScene))
fire.head()

Unnamed: 0,Call Type,Call Date,Received DtTm,Dispatch DtTm,Response DtTm,On Scene DtTm,Station Area,ALS Unit,Call Type Group,Unit Type,Location,Neighborhooods - Analysis Boundaries,diff_received_to_onScene,diff_dispatch_to_onScene,diff_response_to_onScene,norm_diff_received_to_onScene,norm_diff_dispatch_to_onScene,norm_diff_response_to_onScene
0,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:21:12,2019-07-25 19:21:39,41.0,True,Alarm,ENGINE,"(37.792059516639355, -122.41694742808038)",Nob Hill,294.0,172.0,27.0,0.004511,0.010259,0.001611
1,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:19:14,2019-07-25 19:20:43,41.0,True,Alarm,ENGINE,"(37.792059516639355, -122.41694742808038)",Nob Hill,238.0,116.0,89.0,0.003652,0.006919,0.005311
2,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:19:29,2019-07-25 19:20:26,41.0,True,Alarm,ENGINE,"(37.792059516639355, -122.41694742808038)",Nob Hill,221.0,99.0,57.0,0.003391,0.005905,0.003401
4,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:19:32,2019-07-25 19:21:57,41.0,False,Alarm,TRUCK,"(37.792059516639355, -122.41694742808038)",Nob Hill,312.0,190.0,145.0,0.004741,0.011154,0.00919
5,Structure Fire,2019-07-25,2019-07-25 19:16:45,2019-07-25 19:18:47,2019-07-25 19:19:20,2019-07-25 19:21:54,41.0,False,Alarm,TRUCK,"(37.792059516639355, -122.41694742808038)",Nob Hill,309.0,187.0,154.0,0.01275,0.023442,0.021661


In [31]:
# fixed mispelled neighborhood column
fire.rename(columns={'Neighborhooods - Analysis Boundaries': 'Neighborhoods - Analysis Boundaries'}, inplace=True)
# write filtered data to csv
fire.to_csv('FireDepartmentCallsSelectedNAFilteredWithDiffs.csv', index=False)

In [32]:
fire.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 254204 entries, 0 to 315322
Data columns (total 18 columns):
 #   Column                               Non-Null Count   Dtype         
---  ------                               --------------   -----         
 0   Call Type                            254204 non-null  object        
 1   Call Date                            254204 non-null  datetime64[ns]
 2   Received DtTm                        254204 non-null  datetime64[ns]
 3   Dispatch DtTm                        254204 non-null  datetime64[ns]
 4   Response DtTm                        254204 non-null  datetime64[ns]
 5   On Scene DtTm                        254204 non-null  datetime64[ns]
 6   Station Area                         254204 non-null  float64       
 7   ALS Unit                             254204 non-null  bool          
 8   Call Type Group                      254204 non-null  object        
 9   Unit Type                            254204 non-null  object        
 

In [40]:
(1 - 254204.000000 / 254240.000000) * 100

0.014159848961614596