In [36]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler


In [2]:
path = 'California_Fire_Incidents.xlsx'
fire = pd.read_excel(path)
columns = ['AcresBurned', 'AirTankers', 'AdminUnit', 'Counties', 'CountyIds', 'Engines', 'Latitude', 'Longitude', 'Started']

In [3]:
fire_df = fire[columns]
fire_df

Unnamed: 0,AcresBurned,AirTankers,AdminUnit,Counties,CountyIds,Engines,Latitude,Longitude,Started
0,257314.0,,Stanislaus National Forest/Yosemite National Park,Tuolumne,55,,37.857000,-120.086000,2013-08-17T15:25:00Z
1,30274.0,,USFS Angeles National Forest/Los Angeles Count...,Los Angeles,19,,34.585595,-118.423176,2013-05-30T15:28:00Z
2,27531.0,,CAL FIRE Riverside Unit / San Bernardino Natio...,Riverside,33,,33.709500,-116.728850,2013-07-15T13:43:00Z
3,27440.0,,Tahoe National Forest,Placer,31,,39.120000,-120.650000,2013-08-10T16:30:00Z
4,24251.0,,Ventura County Fire/CAL FIRE,Ventura,56,117.0,0.000000,0.000000,2013-05-02T07:01:00Z
...,...,...,...,...,...,...,...,...,...
1631,9.0,,CAL FIRE / Riverside County Fire,Riverside,33,,33.827979,-117.499619,2019-10-10T12:08:00Z
1632,2.0,,CAL FIRE Nevada-Yuba-Placer Unit,Nevada,29,,39.409722,-121.000556,2019-06-28T15:03:04Z
1633,,,Yolo County Fire Protection District,Yolo,57,,38.734634,-121.729691,2019-11-25T12:02:02Z
1634,,,Camp Pendleton Marine Corps Base,San Diego,37,,33.351145,-117.403719,2019-10-22T19:20:44Z


In [4]:
fire_df.isna().sum()

AcresBurned       3
AirTankers     1608
AdminUnit         0
Counties          0
CountyIds         0
Engines        1445
Latitude          0
Longitude         0
Started           0
dtype: int64

In [5]:
fire_df = fire_df.drop(['AirTankers', 'Engines', 'AdminUnit'], axis=1)

In [6]:
fire_df.head()

Unnamed: 0,AcresBurned,Counties,CountyIds,Latitude,Longitude,Started
0,257314.0,Tuolumne,55,37.857,-120.086,2013-08-17T15:25:00Z
1,30274.0,Los Angeles,19,34.585595,-118.423176,2013-05-30T15:28:00Z
2,27531.0,Riverside,33,33.7095,-116.72885,2013-07-15T13:43:00Z
3,27440.0,Placer,31,39.12,-120.65,2013-08-10T16:30:00Z
4,24251.0,Ventura,56,0.0,0.0,2013-05-02T07:01:00Z


In [7]:
fire_df.dtypes

AcresBurned    float64
Counties        object
CountyIds       object
Latitude       float64
Longitude      float64
Started         object
dtype: object

In [26]:
months = []
years = []
for index, row in fire_df.iterrows():
   #print(date)
    month = row['Started'].split('-')[1]
    year = row['Started'].split('-')[0]
    months.append(month)
    years.append(year)

In [33]:
fire_df = fire_df.drop('Started', axis=1)
fire_df['Month'] = months
fire_df['Year'] = years
fire_df.head()

Unnamed: 0,AcresBurned,Counties,CountyIds,Latitude,Longitude,Month,Year
0,257314.0,Tuolumne,55,37.857,-120.086,8,2013
1,30274.0,Los Angeles,19,34.585595,-118.423176,5,2013
2,27531.0,Riverside,33,33.7095,-116.72885,7,2013
3,27440.0,Placer,31,39.12,-120.65,8,2013
4,24251.0,Ventura,56,0.0,0.0,5,2013


In [34]:
labels = fire_df[['Counties', 'CountyIds']]
features = fire_df.drop(['Counties', 'CountyIds'], axis=1)

In [35]:
scaler = StandardScaler()



Unnamed: 0,AcresBurned,Latitude,Longitude,Month,Year
0,257314.0,37.857000,-120.086000,08,2013
1,30274.0,34.585595,-118.423176,05,2013
2,27531.0,33.709500,-116.728850,07,2013
3,27440.0,39.120000,-120.650000,08,2013
4,24251.0,0.000000,0.000000,05,2013
...,...,...,...,...,...
1631,9.0,33.827979,-117.499619,10,2019
1632,2.0,39.409722,-121.000556,06,2019
1633,,38.734634,-121.729691,11,2019
1634,,33.351145,-117.403719,10,2019


## Rough draft model


In [15]:
import sys
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
# from tensorflow import keras
from tensorflow.keras import layers, Sequential, Input
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout, concatenate, BatchNormalization
# from tensorflow.keras.layers.core import *
from tensorflow.keras.models import Model
from tensorflow.keras.layers.experimental import preprocessing
# from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input


In [23]:
def build_dnn(norm=None):

    if norm:
        normalizer = norm
    else:
        normalizer = layers.BatchNormalization(momentum=0.98, epsilon=.001, trainable=True)

    model = Sequential([

        normalizer,
        layers.Dense(24, activation='relu'),
        layers.Dense(16, activation='relu'),
        layers.Dense(8, activation='relu'),
        layers.Dense(1)
    ])

    model.compile(loss='mean_absolute_error', optimizer=tf.keras.optimizers.Adam(0.0001), metrics=['accuracy'])
    print("Model compiled")
    return model

In [24]:
model = build_dnn(norm=None)

Model compiled


In [43]:
labels = pd.get_dummies(labels)

In [44]:
labels

Unnamed: 0,Alameda,Alpine,Amador,Butte,Calaveras,Colusa,Contra Costa,Del Norte,El Dorado,Fresno,...,State of Nevada,State of Oregon,Sutter,Tehama,Trinity,Tulare,Tuolumne,Ventura,Yolo,Yuba
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1631,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1632,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1633,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1634,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [45]:
model.fit(features, labels)



<tensorflow.python.keras.callbacks.History at 0x21fcb855fc8>