**Imports**

In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

**Pre-Processing**

In [5]:
data = pd.read_csv('../data/WildfireData.csv')
data.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,fire_name,fire_size,fire_size_class,stat_cause_descr,latitude,longitude,state,disc_clean_date,...,Wind_cont,Hum_pre_30,Hum_pre_15,Hum_pre_7,Hum_cont,Prec_pre_30,Prec_pre_15,Prec_pre_7,Prec_cont,remoteness
0,0,0,,10.0,C,Missing/Undefined,18.105072,-66.753044,PR,2/11/2007,...,3.250413,78.21659,76.79375,76.381579,78.72437,0.0,0.0,0.0,0.0,0.017923
1,1,1,,3.0,B,Arson,35.03833,-87.61,TN,12/11/2006,...,2.12232,70.84,65.858911,55.505882,81.682678,59.8,8.4,0.0,86.8,0.184355
2,2,2,,60.0,C,Arson,34.9478,-88.7225,MS,2/29/2004,...,3.36905,75.531629,75.868613,76.812834,65.0638,168.8,42.2,18.1,124.5,0.194544
3,3,3,WNA 1,1.0,B,Debris Burning,39.6414,-119.3083,NV,6/6/2005,...,0.0,44.778429,37.140811,35.353846,0.0,10.4,7.2,0.0,0.0,0.487447
4,4,4,,2.0,B,Miscellaneous,30.7006,-90.5914,LA,9/22/1999,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.214633


In [6]:
data = data[['fire_size', 'remoteness', 'putout_time', 'stat_cause_descr']]
data.head()

Unnamed: 0,fire_size,remoteness,putout_time,stat_cause_descr
0,10.0,0.017923,,Missing/Undefined
1,3.0,0.184355,,Arson
2,60.0,0.194544,,Arson
3,1.0,0.487447,0 days 00:00:00.000000000,Debris Burning
4,2.0,0.214633,,Miscellaneous


In [7]:
data = data.dropna()
data['putout_time'] = data['putout_time'].str.split(' ').str[0]
data.head()

Unnamed: 0,fire_size,remoteness,putout_time,stat_cause_descr
3,1.0,0.487447,0,Debris Burning
9,1.0,0.291683,0,Miscellaneous
11,37.0,0.191923,0,Arson
14,3.0,0.123518,0,Missing/Undefined
22,2.0,0.054988,0,Miscellaneous


In [8]:
data.stat_cause_descr.unique()

array(['Debris Burning', 'Miscellaneous', 'Arson', 'Missing/Undefined',
       'Lightning', 'Equipment Use', 'Campfire', 'Structure', 'Children',
       'Smoking', 'Powerline', 'Railroad', 'Fireworks'], dtype=object)

In [9]:
data['stat_cause_descr'] = data['stat_cause_descr'].map({
    'Debris Burning': 0,
    'Arson': 1,
    'Lightning': 2,
    'Equipment Use': 3,
    'Campfire': 4,
    'Structure': 5,
    'Children': 5,
    'Smoking': 5,
    'Powerline': 5,
    'Railroad': 5,
    'Fireworks': 5,
    'Missing/Undefined': 5,
    'Miscellaneous': 5
})
data.stat_cause_descr.unique()
data.head()

Unnamed: 0,fire_size,remoteness,putout_time,stat_cause_descr
3,1.0,0.487447,0,0
9,1.0,0.291683,0,5
11,37.0,0.191923,0,1
14,3.0,0.123518,0,5
22,2.0,0.054988,0,5


In [19]:
from sklearn.model_selection import train_test_split
X = data.drop('stat_cause_descr', axis=1).astype('float')
y = data.stat_cause_descr.astype('float')
print(X.head())
print(y.head())
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

    fire_size  remoteness  putout_time
3         1.0    0.487447          0.0
9         1.0    0.291683          0.0
11       37.0    0.191923          0.0
14        3.0    0.123518          0.0
22        2.0    0.054988          0.0
3     0.0
9     5.0
11    1.0
14    5.0
22    5.0
Name: stat_cause_descr, dtype: float64


In [20]:
X_train.shape

(24729, 3)

In [21]:
y_train.shape

(24729,)

In [22]:
from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [23]:
y_train.shape

(24729, 6)

**NN Model**

In [24]:
model = Sequential()
model.add(Dense(32, input_dim=(3), activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(6, activation='softmax'))

In [25]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [26]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=64, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x1b15cd79100>

In [17]:
# test model
test_data = {
    'fire_size': [37.0],
    'remoteness': [0.191923],
    'putout_time': [0]	
}
test_data = pd.DataFrame(test_data)
test_data

Unnamed: 0,fire_size,remoteness,putout_time
0,37.0,0.191923,0


In [None]:
result = model.predict(test_data)
result

In [None]:
# find class
final = 0
for i in range(6):
    if (result[0][i] > final):
        final = result[0][i]
    # print(final)
final = np.where(result[0] == final)
print(final[0][0])
    

In [None]:
# save model
model.save("../Weights/cause.h5")

In [None]:
# add cause to predicted fires dataset
predicted_data = pd.read_csv('../data/PredictedData.csv')

In [None]:
predicted_data = predicted_data.dropna()
predicted_data['putout_time'] = predicted_data['putout_time'].str.split(' ').str[0]
test_data = predicted_data[['fire_size', 'remoteness', 'putout_time']].astype('float')
results = model.predict(test_data)

**XGBoost Classifier**

In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

In [None]:
xgb_model = XGBClassifier(
        silent=False, 
        scale_pos_weight=1,
        learning_rate=0.01,  
        colsample_bytree = 1,
        subsample = 0.8,
        objective='multi:softprob', 
        n_estimators=1000, 
        reg_alpha = 0.3,
        max_depth=5, 
        gamma=1)
xgb_model.fit(X_train, y_train)

In [None]:
y_pred = xgb_model.predict(X_test)
predictions = [round(value) for value in y_pred]

In [None]:
accuracy = round(accuracy_score(y_test, predictions), 2)
accuracy

In [None]:
result = xgb_model.predict(test_data)
result[0]

In [None]:
print(X_test.tail())
print(y_test.tail())

In [None]:
# test model
test_data1 = {
    'fire_size': [5.0],
    'remoteness': [0.250806],
    'putout_time': [2.0]	
}
test_data1 = pd.DataFrame(test_data1)
test_data1

In [None]:
result1 = xgb_model.predict(test_data1)
result1[0].astype('int')