## Cleaning Fire Incident Data
- Removed 2007 and 2019 as they were incomplete.
- Formatted datetime columns
- Removed records with no incident type, missing coordinates, missing dispatch date
- Added date and year columns for merging with weather, population data respectively
- Dropped unnecessary columns
- Export unified, cleaned dataset to csv for team analysis

In [1]:
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date

In [2]:
# Importing the cleaned/combined data from the csv

firefile = './output/raw_fire_data.csv'
df = pd.read_csv(firefile, skip_blank_lines=True, encoding='UTF-8', parse_dates=True,infer_datetime_format=True, low_memory=False)
len(df)

175738

In [3]:
missing = './output/extra_raw_fire_data.csv'
df2 = pd.read_csv(missing, skip_blank_lines=True, encoding='UTF-8',parse_dates=True, infer_datetime_format=True)
len(df2)


2000

In [4]:
# The full dataset available from Raleigh Open Data now here thanks to append
df3 = df.append(df2, ignore_index=True)
len(df3)

177738

In [5]:
# Removing anything missing coordinate information
df3 = df3.loc[df3['Longitude']!='missing',:]
len(df3)

176289

In [6]:
# Remove anything without an incident type
df3 = df3.loc[df3['incident_type'].notnull()]
len(df3)

173246

In [7]:
# Remove anything without a dispatch date/time since we need something for the date grouping!
df3 = df3.loc[df3['dispatch_date_time'].notnull()]
len(df3)

171820

In [8]:
df3['dispatch_date_time'] = df3['dispatch_date_time']/1000
df3['arrive_date_time'] = df3['arrive_date_time']/1000
df3['cleared_date_time'] = df3['cleared_date_time']/1000

In [9]:
df3['dispatch_date_time'] = pd.to_datetime(df3['dispatch_date_time'], errors='coerce', yearfirst=True, unit='s')
df3['arrive_date_time'] = pd.to_datetime(df3['arrive_date_time'], errors='coerce', yearfirst=True, unit='s')
df3['cleared_date_time'] = pd.to_datetime(df3['cleared_date_time'], errors='coerce', yearfirst=True, unit='s')

In [10]:
# df3['dispatch_date_time'] = df3['dispatch_date_time'].dt.tz_localize('US/Eastern')

In [11]:
df3.head()

Unnamed: 0,GlobalID,OBJECTID,address,address2,apt_room,arrive_date_time,cleared_date_time,dispatch_date_time,exposure,incident_number,incident_type,incident_type_description,platoon,station,Longitude,Latitude
3,b58df98a-c21d-4a2e-a00d-b9b66087a822,474768,"1216 BEVERLY DR RALEIGH, NC 27601",,,2007-08-21 22:52:00,2007-08-21 22:58:00,2007-08-21 22:47:00,0,07-0023051,444.0,Power line down,A,12.0,-78.59542156770867,35.76121328320593
4,eb6f5e12-dec5-4f55-9746-d2867a13d9e6,474769,"8332 SHILOH CREEK CT RALEIGH, NC 27603",,,2013-10-19 21:52:00,2013-10-19 21:59:00,2013-10-19 21:47:00,0,13-0030918,150.0,Outside rubbish fire Other,B,22.0,-78.53732007304758,35.891622339123025
5,8df5e743-2b41-4b22-9012-6e8cbe9327e2,474770,"7513 IDOLBROOK LN RALEIGH, NC 27615",,,2014-02-14 21:49:00,2014-02-14 21:51:00,2014-02-14 21:46:00,0,14-0004846,510.0,Person in distress Other,A,4.0,-78.64763063692541,35.8781858441652
6,907e5339-78fc-4873-90e2-94ae47f225e4,474771,"1205 S BLOODWORTH ST RALEIGH, NC 27601",,,2012-04-29 05:17:00,2012-04-29 05:38:00,2012-04-29 05:16:00,0,12-0011779,440.0,Electrical wiring/equipment problem Other,A,1.0,-78.63397664076385,35.765387817948216
7,d86875b1-1b90-47cb-94fa-23031de9f995,474772,"7628 VALVIEW ST RALEIGH, NC 27601",,,2012-04-28 23:46:00,2012-04-28 23:52:00,2012-04-28 23:43:00,0,12-9010131,561.0,Unauthorized burning,A,18.0,-78.69139554786135,35.88444729034989


In [12]:
# Adding a year column for later groupby's
df3['Year'] = pd.DatetimeIndex(df3['dispatch_date_time']).year

In [13]:
years = df3.groupby(['Year']).OBJECTID.count()
years

Year
2007    13389
2008    13705
2009    13026
2010    13531
2011    13893
2012    13607
2013    13943
2014    14415
2015    15068
2016    15643
2017    18051
2018    13541
2019        8
Name: OBJECTID, dtype: int64

In [14]:
# Making a date column for merging the weather data later
df3['Date'] = df3['dispatch_date_time'].astype(str).str.split(' ',expand=True)[0]

In [15]:
# Removing 2007 data that had a lot of nulls
df3 = df3.loc[df3['Year'] > 2007]
len(df3)

158431

In [16]:
# Removing 2019 because the year just started
df3 = df3.loc[df3['Year'] < 2019]
len(df3)

158423

In [17]:
df3.set_index('OBJECTID')

Unnamed: 0_level_0,GlobalID,address,address2,apt_room,arrive_date_time,cleared_date_time,dispatch_date_time,exposure,incident_number,incident_type,incident_type_description,platoon,station,Longitude,Latitude,Year,Date
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
474769,eb6f5e12-dec5-4f55-9746-d2867a13d9e6,"8332 SHILOH CREEK CT RALEIGH, NC 27603",,,2013-10-19 21:52:00,2013-10-19 21:59:00,2013-10-19 21:47:00,0,13-0030918,150.0,Outside rubbish fire Other,B,22.0,-78.53732007304758,35.891622339123025,2013,2013-10-19
474770,8df5e743-2b41-4b22-9012-6e8cbe9327e2,"7513 IDOLBROOK LN RALEIGH, NC 27615",,,2014-02-14 21:49:00,2014-02-14 21:51:00,2014-02-14 21:46:00,0,14-0004846,510.0,Person in distress Other,A,4.0,-78.64763063692541,35.8781858441652,2014,2014-02-14
474771,907e5339-78fc-4873-90e2-94ae47f225e4,"1205 S BLOODWORTH ST RALEIGH, NC 27601",,,2012-04-29 05:17:00,2012-04-29 05:38:00,2012-04-29 05:16:00,0,12-0011779,440.0,Electrical wiring/equipment problem Other,A,1.0,-78.63397664076385,35.765387817948216,2012,2012-04-29
474772,d86875b1-1b90-47cb-94fa-23031de9f995,"7628 VALVIEW ST RALEIGH, NC 27601",,,2012-04-28 23:46:00,2012-04-28 23:52:00,2012-04-28 23:43:00,0,12-9010131,561.0,Unauthorized burning,A,18.0,-78.69139554786135,35.88444729034989,2012,2012-04-28
474773,507c4929-34a1-457f-80f2-3af654aea4fb,"4201 MANGROVE DR RALEIGH, NC 27603",,,2014-01-26 22:35:00,2014-01-26 22:38:00,2014-01-26 22:31:00,0,14-0002916,520.0,Water problem Other,B,28.0,-78.51656019368116,35.87272474515293,2014,2014-01-26
474774,ffe5877d-cb56-4faa-9f0b-b330773517b6,"250 E DAVIE ST RALEIGH, NC 27601",,,2012-02-28 00:58:00,2012-02-28 01:13:00,2012-02-28 00:56:00,0,12-0005738,531.0,Smoke or odor removal,C,1.0,-78.63570879834582,35.775440410967136,2012,2012-02-28
474775,5c7a213b-7fd8-4b7d-8cc9-2fa01d160b33,"7013 CEDAR BEND CT RALEIGH, NC 27601",,,2011-12-15 13:18:00,2011-12-15 13:29:00,2011-12-15 13:10:00,0,11-0036227,554.0,Assist invalid,A,18.0,-78.67235898395957,35.87947969095349,2011,2011-12-15
474776,239196e9-c64c-4dd2-9c37-58f6ac5f2d1a,"3829 BROWNING PL RALEIGH, NC 27601",,,2014-04-26 05:35:00,2014-04-26 05:43:00,2014-04-26 05:32:00,0,14-0012080,700.0,False alarm or false call Other,A,9.0,-78.63418234687909,35.83090808635709,2014,2014-04-26
474777,0f83feb0-8dad-4f6a-8ef7-16a4cca98d46,"5614 APALACHICULA DR RALEIGH, NC 27603",,,2014-05-01 01:02:00,2014-05-01 01:18:00,2014-05-01 00:55:00,0,14-0012609,511.0,Lock-out,B,22.0,-78.54641634893586,35.88755294805183,2014,2014-05-01
474778,9046e10a-2654-4c19-b0a6-b4e18b8ffa67,"8431 BERKSHIRE VILLAGE CT RALEIGH, NC 27603",,,2010-05-15 17:49:00,2010-05-15 17:59:00,2010-05-15 17:45:00,0,10-0013764,510.0,Person in distress Other,C,22.0,-78.5578598238469,35.886912510192204,2010,2010-05-15


In [18]:
df3.drop(['GlobalID','address2','apt_room','exposure','incident_number'], axis=1)

Unnamed: 0,OBJECTID,address,arrive_date_time,cleared_date_time,dispatch_date_time,incident_type,incident_type_description,platoon,station,Longitude,Latitude,Year,Date
4,474769,"8332 SHILOH CREEK CT RALEIGH, NC 27603",2013-10-19 21:52:00,2013-10-19 21:59:00,2013-10-19 21:47:00,150.0,Outside rubbish fire Other,B,22.0,-78.53732007304758,35.891622339123025,2013,2013-10-19
5,474770,"7513 IDOLBROOK LN RALEIGH, NC 27615",2014-02-14 21:49:00,2014-02-14 21:51:00,2014-02-14 21:46:00,510.0,Person in distress Other,A,4.0,-78.64763063692541,35.8781858441652,2014,2014-02-14
6,474771,"1205 S BLOODWORTH ST RALEIGH, NC 27601",2012-04-29 05:17:00,2012-04-29 05:38:00,2012-04-29 05:16:00,440.0,Electrical wiring/equipment problem Other,A,1.0,-78.63397664076385,35.765387817948216,2012,2012-04-29
7,474772,"7628 VALVIEW ST RALEIGH, NC 27601",2012-04-28 23:46:00,2012-04-28 23:52:00,2012-04-28 23:43:00,561.0,Unauthorized burning,A,18.0,-78.69139554786135,35.88444729034989,2012,2012-04-28
8,474773,"4201 MANGROVE DR RALEIGH, NC 27603",2014-01-26 22:35:00,2014-01-26 22:38:00,2014-01-26 22:31:00,520.0,Water problem Other,B,28.0,-78.51656019368116,35.87272474515293,2014,2014-01-26
9,474774,"250 E DAVIE ST RALEIGH, NC 27601",2012-02-28 00:58:00,2012-02-28 01:13:00,2012-02-28 00:56:00,531.0,Smoke or odor removal,C,1.0,-78.63570879834582,35.775440410967136,2012,2012-02-28
10,474775,"7013 CEDAR BEND CT RALEIGH, NC 27601",2011-12-15 13:18:00,2011-12-15 13:29:00,2011-12-15 13:10:00,554.0,Assist invalid,A,18.0,-78.67235898395957,35.87947969095349,2011,2011-12-15
11,474776,"3829 BROWNING PL RALEIGH, NC 27601",2014-04-26 05:35:00,2014-04-26 05:43:00,2014-04-26 05:32:00,700.0,False alarm or false call Other,A,9.0,-78.63418234687909,35.83090808635709,2014,2014-04-26
12,474777,"5614 APALACHICULA DR RALEIGH, NC 27603",2014-05-01 01:02:00,2014-05-01 01:18:00,2014-05-01 00:55:00,511.0,Lock-out,B,22.0,-78.54641634893586,35.88755294805183,2014,2014-05-01
13,474778,"8431 BERKSHIRE VILLAGE CT RALEIGH, NC 27603",2010-05-15 17:49:00,2010-05-15 17:59:00,2010-05-15 17:45:00,510.0,Person in distress Other,C,22.0,-78.5578598238469,35.886912510192204,2010,2010-05-15


In [19]:
df3.loc[df3['OBJECTID']==474769]

Unnamed: 0,GlobalID,OBJECTID,address,address2,apt_room,arrive_date_time,cleared_date_time,dispatch_date_time,exposure,incident_number,incident_type,incident_type_description,platoon,station,Longitude,Latitude,Year,Date
4,eb6f5e12-dec5-4f55-9746-d2867a13d9e6,474769,"8332 SHILOH CREEK CT RALEIGH, NC 27603",,,2013-10-19 21:52:00,2013-10-19 21:59:00,2013-10-19 21:47:00,0,13-0030918,150.0,Outside rubbish fire Other,B,22.0,-78.53732007304758,35.891622339123025,2013,2013-10-19


In [20]:
df3.to_csv("./output/all_with_no_incident_nulls.csv", index=False, header=True)