In [23]:
# Dependencies
import numpy as np
import pandas as pd
import requests
import json
import urllib
import time

# Creating an array of ranges to iterate through all the API calls
arr = np.arange(0, 178000, 2000)
arr

array([     0,   2000,   4000,   6000,   8000,  10000,  12000,  14000,
        16000,  18000,  20000,  22000,  24000,  26000,  28000,  30000,
        32000,  34000,  36000,  38000,  40000,  42000,  44000,  46000,
        48000,  50000,  52000,  54000,  56000,  58000,  60000,  62000,
        64000,  66000,  68000,  70000,  72000,  74000,  76000,  78000,
        80000,  82000,  84000,  86000,  88000,  90000,  92000,  94000,
        96000,  98000, 100000, 102000, 104000, 106000, 108000, 110000,
       112000, 114000, 116000, 118000, 120000, 122000, 124000, 126000,
       128000, 130000, 132000, 134000, 136000, 138000, 140000, 142000,
       144000, 146000, 148000, 150000, 152000, 154000, 156000, 158000,
       160000, 162000, 164000, 166000, 168000, 170000, 172000, 174000,
       176000])

In [24]:
#This API request somehow limits the number of records returned to 2000. But we can download all of them from the site
# as a json file. However, if we want we can iterate through the 178000 records 89 times and get them all :)

base_url = "https://services.arcgis.com/v400IkDOw1ad7Yad/arcgis/rest/services/Fire_Incidents_Public/FeatureServer/0/query?"

field_list = "OBJECTID,incident_number,incident_type,incident_type_description,arrive_date_time,dispatch_date_time,exposure,platoon,station,address,address2,apt_room,GlobalID,cleared_date_time"
parameters = f"where=1%3D1&outFields={field_list}&outSR=4326&f=json"

set_length = 1

# resultsOffset is the record to start with, resultRecordCount is the number of records to return, max=2000
#For iterating through API calls to get all the data. We know there are 177,738 Rows, so we need to run this 89 times.

records = f'&resultOffset={set_length}&resultRecordCount=2000'

# fire_data = requests.get(base_url + parameters + records).json()


In [25]:
# Looks like we can iterate through the calls to get all the records.
# Here is a function that iterates through a list and returns all the JSON data to a hard file

def arcgis_api(sets):
    fire_data = []
    # Work with the log file to record the API calls/errors
    timestr = time.strftime('%Y%m%d-%H%M%S')
    log = 'log_' + timestr + '.txt'
    print(f'Logging API calls in {log}')

    with open(log, 'a+') as f:

        for n in sets:
            status = (f'Getting data for records {n}: {n+2000}... ')
            print(status)
            #Perform the API call on ARCGIS
            try:
                url = base_url + parameters + f'&resultOffset={n}&resultRecordCount=2000'
                records = requests.get(url).json()
                f.write(url)
                fire_data.append(records)
            #Log the result
                msg = 'Success!\n'
                f.write(msg)
                print(msg)
                time.sleep(2)
            except requests.exceptions.ContentDecodingError as e:
                msg = f'There was an error with the set starting with record {n}\n'
                f.write(msg)
                print(msg)

    with open('fire' + timestr + '.txt', 'a+') as file:
        print(f'Dumping json data to {file}')
        json.dump(fire_data, file)



In [26]:
# Calling the function with the array created above
# that will do the 89 API calls and save all the data to the fire_timestamp_txt file
# There will also be a logfile of the transactions
# arcgis_api(arr)


In [27]:
# START HERE!!!
firefile = 'fire20190112-113945.txt'
with open(firefile) as jsonfile:
    fire_data = json.load(jsonfile)
    
fire_data[0]['features'][0]['geometry']

{'x': -78.62660452385826, 'y': 35.870212857675455}

In [28]:
#Once we have all the json information we can parse them out as such
incidents = []
lat = []
lon = []
# loop through each set and parse the data
for sets in fire_data:
    for i in sets['features']:
        try:
            incidents.append(i['attributes'])
            lat.append(i['geometry']['y'])
            lon.append(i['geometry']['x'])
        except:
            lat.append('missing')
            lon.append('missing')
#             print('Missing Geometry... ')
len(incidents)

175738

In [29]:
df = pd.DataFrame(incidents)

In [30]:
df.head()

Unnamed: 0,GlobalID,OBJECTID,address,address2,apt_room,arrive_date_time,cleared_date_time,dispatch_date_time,exposure,incident_number,incident_type,incident_type_description,platoon,station
0,5c2e9c89-78d7-4348-bd95-23a69f4039aa,474765,"6647 LAKE HILL DR RALEIGH, NC 27601",,0.0,1195125000000.0,1195126000000.0,1195125000000.0,0,07-0031665,,,,
1,b6bdde67-94b6-409d-a17d-55498242caf1,474766,"539 METHOD RD RALEIGH, NC 27606",,,1214720000000.0,1214722000000.0,1214720000000.0,0,08-0017918,,,,
2,46aba405-7f74-4e64-a9ef-2af357ab2cdd,474767,"2100 RUARK CT RALEIGH, NC 27601",,202.0,1226982000000.0,1226982000000.0,1226982000000.0,0,08-0032426,,,,
3,b58df98a-c21d-4a2e-a00d-b9b66087a822,474768,"1216 BEVERLY DR RALEIGH, NC 27601",,,1187737000000.0,1187737000000.0,1187736000000.0,0,07-0023051,444.0,Power line down,A,12.0
4,eb6f5e12-dec5-4f55-9746-d2867a13d9e6,474769,"8332 SHILOH CREEK CT RALEIGH, NC 27603",,,1382220000000.0,1382220000000.0,1382219000000.0,0,13-0030918,150.0,Outside rubbish fire Other,B,22.0


In [31]:
df['Longitude'] = lon
df['Latitude'] = lat

In [33]:
df.head()

Unnamed: 0,GlobalID,OBJECTID,address,address2,apt_room,arrive_date_time,cleared_date_time,dispatch_date_time,exposure,incident_number,incident_type,incident_type_description,platoon,station,Longitude,Latitude
0,5c2e9c89-78d7-4348-bd95-23a69f4039aa,474765,"6647 LAKE HILL DR RALEIGH, NC 27601",,0.0,1195125000000.0,1195126000000.0,1195125000000.0,0,07-0031665,,,,,-78.6266,35.8702
1,b6bdde67-94b6-409d-a17d-55498242caf1,474766,"539 METHOD RD RALEIGH, NC 27606",,,1214720000000.0,1214722000000.0,1214720000000.0,0,08-0017918,,,,,-78.6936,35.7929
2,46aba405-7f74-4e64-a9ef-2af357ab2cdd,474767,"2100 RUARK CT RALEIGH, NC 27601",,202.0,1226982000000.0,1226982000000.0,1226982000000.0,0,08-0032426,,,,,-78.6278,35.8122
3,b58df98a-c21d-4a2e-a00d-b9b66087a822,474768,"1216 BEVERLY DR RALEIGH, NC 27601",,,1187737000000.0,1187737000000.0,1187736000000.0,0,07-0023051,444.0,Power line down,A,12.0,-78.5954,35.7612
4,eb6f5e12-dec5-4f55-9746-d2867a13d9e6,474769,"8332 SHILOH CREEK CT RALEIGH, NC 27603",,,1382220000000.0,1382220000000.0,1382219000000.0,0,13-0030918,150.0,Outside rubbish fire Other,B,22.0,-78.5373,35.8916


In [34]:
df.to_csv("output/raw_fire_data.csv", index=False, header=True, float_format='%.f')