In [175]:
import sqlite3
import numpy as np
import pandas as pd
from jdcal import gcal2jd, jd2gcal, jcal2jd, MJD_0
import keras
import math
import datetime
import tensorflowjs as tfjs
import pickle
import julian as julian
import datetime
import calendar
import sklearn as metrics
from keras.models import Sequential, model_from_yaml, load_model
from keras.layers import Dense, LSTM, Dropout, Embedding, Masking
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE

In [181]:
def convertJulianDateToDays(y, jd):
    month = 1
    day = 0
    while jd - calendar.monthrange(y,month)[1] > 0 and month <= 12:
        jd = jd - calendar.monthrange(y,month)[1]
        month = month + 1
    return datetime.datetime(y, month, jd)

def convertJulianTimeToCategory(param):
    if (param[0] == None):
        return "n_a";
        
    row = int(param[0])
    if row >= 0 and row < 400:
        return 'early_morning'
    elif row >= 400 and  row < 800:
        return 'mid_morning'
    elif row >= 800 and  row < 1200:
        return 'late_morning'
    elif row >=1200 and  row <1600:
        return 'afternoon'
    elif row >=1600 and  row <2000:
        return 'evening'
    elif row >=2000 and  row <2400:
        return 'night'
    else:
        return 'n_a'

def encodeOutputVariable(y):
    labelencoder_Y_Origin = LabelEncoder()
    y = labelencoder_Y_Origin.fit_transform(y.astype(str))
    return y

def encodeCategoricalData(X, index):
    # encode categorical data
    labelencoder_X_Origin = LabelEncoder()
    X[:, index] = labelencoder_X_Origin.fit_transform(X[:, index].astype(str))
    return X    

def manualEncodeLongStrings(X, column):
    index = 0
    test = 0
    keys = {}
    for row in X:
        key = row[column].replace(", ", "").replace(" ", "")
        if (keys.get(key) == None):
            keys[key] = index
            index += 1
        X[test][column] = keys.get(key)
        test += 1
    return X
    
def defaultMinimumValues(values): 
    for index, x in enumerate(values):
        x = [float(y) if y != None and float(y) > 0.0 else 0 for y in x]
        values[index] = x
    return pd.DataFrame(values)

def convertDateColumns(X, column):
    X = X[:, column].apply(lambda row: julian.from_jd(row, fmt='mmddyyyy'))
    print(X[:, column])
    return X.values

def encodeHotEncoder(X, categoryIndex):
    # meant to create dummy variables for each category data
    # you only use it for one column at a time, output will be the number of columns
    # needed to represent all discrete values of column
    onehotencoder = OneHotEncoder(categorical_features = [categoryIndex])
    X = defaultMinimumValues(X)
    X = onehotencoder.fit_transform(X.astype(str)).toarray()    
    X = X[:, 1:]
    return X

def determineTotalTime(startDay, startTime, endDay, endTime):
    minsPerDay = 24 * 60
    totalStart = (int(startDay) * minsPerDay) + int(startTime)
    totalEnd = (int(endDay) * minsPerDay) + int(endTime)
    return totalEnd - totalStart

def createArtificialNeuralNetwork(X_train, y_train):
    # create ANN

    # initialize the ann
    classifier = Sequential()

    # adding the input layer and the first hidden layer
    classifier.add(Dense(100, kernel_initializer = "uniform", activation = "relu", input_dim = 41))
    
    classifier.add(Dense(90, kernel_initializer = "uniform", activation="relu"))
    
    classifier.add(Dense(75, kernel_initializer = "uniform", activation="relu"))
    
    # adding the second hidden layer
    classifier.add(Dense(50, kernel_initializer = "uniform", activation = "relu"))

    # adding the third hidden layer
    classifier.add(Dense(25, kernel_initializer = "uniform", activation = "relu"))

    classifier.add(Dense(20, kernel_initializer = "uniform", activation="relu"))
    
    # adding the fourth hidden layer
    classifier.add(Dense(10, kernel_initializer = "uniform", activation = "relu"))
    
    classifier.add(Dense(5, kernel_initializer = "uniform", activation="relu"))

    # adding the output layer 
    classifier.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))

    # compiling the ANN
    classifier.compile(optimizer = "adam", loss = "mean_squared_error", metrics = ["accuracy"])

    # fitting the ANN to the training set
    classifier.fit(X_train, y_train, batch_size = 100, epochs = 200)
    
def creatingRandomForestPredictor(X_train, y_train, X_test, y_test):
    print("\nRandom Forest Classifier Section")
    print("---------------------------------")
    
    # initialize the Random Forest Classifier
    random_forest_classifier = RandomForestClassifier(**{'n_jobs': -1,
        'n_estimators': 1500,
        'warm_start': True, 
        'max_features': 0.5,
        'max_depth': 15,
        'min_samples_leaf': 2,
        'max_features' : 'sqrt',
        'random_state' : 0,
        'verbose': 0
                                                      })
    
    oversampler = SMOTE(random_state=0)
    smote_X_train, smote_y_train = oversampler.fit_sample(X_train, y_train)
    
    # fitting Random Forest to the training set
    random_forest_classifier.fit(smote_X_train, smote_y_train)
    
    # Predicting the Test set results
    rf_y_pred = random_forest_classifier.predict(X_test)
    
    # use the threshold of error to determine whether a prediction is valid
    rf_y_pred = (rf_y_pred > 0.5)
    
    # making the confusion matrix
    #cm = metrics.confusion_matrix(y_test.ravel(), rf_y_pred.ravel())
    
    print("Training set Score: ", random_forest_classifier.score(X_train, y_train))
    print("Testing set Score: ", random_forest_classifier.score(X_test, y_test)) 
    
def createNaiveBayesModel(X_train, y_train, X_test, y_test):
    classifier = GaussianNB()
    
    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)
    
    y_pred = (y_pred > 0.5)
    
    print("Training set Score: ", classifier.score(X_train, y_train))
    print("Testing set Score: ", classifier.score(X_test, y_test))    

In [161]:
sqlite_file = "./wildfires.sqlite"

# connecting to the database file and saving the select
conn = sqlite3.connect(sqlite_file)
dataset = pd.read_sql_query("select * from Fires limit 5000;", conn)

In [162]:
dataset.iloc[:,:]

Unnamed: 0,OBJECTID,FOD_ID,FPA_ID,SOURCE_SYSTEM_TYPE,SOURCE_SYSTEM,NWCG_REPORTING_AGENCY,NWCG_REPORTING_UNIT_ID,NWCG_REPORTING_UNIT_NAME,SOURCE_REPORTING_UNIT,SOURCE_REPORTING_UNIT_NAME,...,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,OWNER_CODE,OWNER_DESCR,STATE,COUNTY,FIPS_CODE,FIPS_NAME,Shape
0,1,1,FS-1418826,FED,FS-FIRESTAT,FS,USCAPNF,Plumas National Forest,0511,Plumas National Forest,...,A,40.036944,-121.005833,5.0,USFS,CA,63,063,Plumas,b'\x00\x01\xad\x10\x00\x00\xe8d\xc2\x92_@^\xc0...
1,2,2,FS-1418827,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,0503,Eldorado National Forest,...,A,38.933056,-120.404444,5.0,USFS,CA,61,061,Placer,b'\x00\x01\xad\x10\x00\x00T\xb6\xeej\xe2\x19^\...
2,3,3,FS-1418835,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,0503,Eldorado National Forest,...,A,38.984167,-120.735556,13.0,STATE OR PRIVATE,CA,17,017,El Dorado,b'\x00\x01\xad\x10\x00\x00\xd0\xa5\xa0W\x13/^\...
3,4,4,FS-1418845,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,0503,Eldorado National Forest,...,A,38.559167,-119.913333,5.0,USFS,CA,3,003,Alpine,b'\x00\x01\xad\x10\x00\x00\x94\xac\xa3\rt\xfa]...
4,5,5,FS-1418847,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,0503,Eldorado National Forest,...,A,38.559167,-119.933056,5.0,USFS,CA,3,003,Alpine,b'\x00\x01\xad\x10\x00\x00@\xe3\xaa.\xb7\xfb]\...
5,6,6,FS-1418849,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,0503,Eldorado National Forest,...,A,38.635278,-120.103611,5.0,USFS,CA,5,005,Amador,b'\x00\x01\xad\x10\x00\x00\xf0<~\x90\xa1\x06^\...
6,7,7,FS-1418851,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,0503,Eldorado National Forest,...,A,38.688333,-120.153333,5.0,USFS,CA,17,017,El Dorado,b'\x00\x01\xad\x10\x00\x00$o\x996\xd0\t^\xc0h\...
7,8,8,FS-1418854,FED,FS-FIRESTAT,FS,USCASHF,Shasta-Trinity National Forest,0514,Shasta-Trinity National Forest,...,B,40.968056,-122.433889,13.0,STATE OR PRIVATE,CA,,,,b'\x00\x01\xad\x10\x00\x00t)\xe8\xd5\xc4\x9b^\...
8,9,9,FS-1418856,FED,FS-FIRESTAT,FS,USCASHF,Shasta-Trinity National Forest,0514,Shasta-Trinity National Forest,...,B,41.233611,-122.283333,13.0,STATE OR PRIVATE,CA,,,,"b'\x00\x01\xad\x10\x00\x00\xdc\x8d\x1e""""\x92^\..."
9,10,10,FS-1418859,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,0503,Eldorado National Forest,...,A,38.548333,-120.149167,5.0,USFS,CA,5,005,Amador,b'\x00\x01\xad\x10\x00\x00dS\\\xf2\x8b\t^\xc0\...


In [163]:
# split dataset into train and test lists
X = dataset.iloc[:, [34, 35, 30, 31, 29, 28, 19, 20, 22, 25, 27]].values
y = dataset.iloc[:, 23].values
X

array([['CA', '63', 40.03694444, ..., '1300', 2453403.5, '1730'],
       ['CA', '61', 38.93305556, ..., '0845', 2453137.5, '1530'],
       ['CA', '17', 38.98416667, ..., '1921', 2453156.5, '2024'],
       ...,
       ['WA', '65', 48.39083333, ..., '1030', 2453583.5, '1500'],
       ['AZ', '13', 33.98638889, ..., '0842', 2453686.5, '1015'],
       ['WA', '65', 48.35694444, ..., '1630', 2453584.5, '1800']],
      dtype=object)

In [164]:
# newDiscoverTime = X.loc[:, "DISCOVERY_TIME": "DISCOVERY_TIME"].apply(convertJulianTimeToCategory, axis=1)
# newContTime = X.loc[:, "CONT_TIME": "CONT_TIME"].apply(convertJulianTimeToCategory, axis=1)
# X.loc[:, "DISCOVERY_TIME": "DISCOVERY_TIME"] = newDiscoverTime
# X.loc[:, "CONT_TIME": "CONT_TIME"] = newContTime
# X = X.values

In [165]:
X = encodeCategoricalData(X, 0)
X = encodeCategoricalData(X, 4)
# X = encodeCategoricalData(X, 7)
# X = encodeCategoricalData(X, 8)
X = encodeHotEncoder(X, 0)
X = encodeHotEncoder(X, 4)
# X = encodeHotEncoder(X, 7)
# X = encodeHotEncoder(X, 8)

In [166]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [176]:
createNaiveBayesModel(X_train, y_train, X_test, y_test)

Training set Score:  0.03
Testing set Score:  0.021333333333333333


In [182]:
createArtificialNeuralNetwork(X_train, y_train)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155

Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


In [180]:
creatingRandomForestPredictor(X_train, y_train, X_test, y_test)


Random Forest Classifier Section
---------------------------------
Training set Score:  0.8211428571428572
Testing set Score:  0.6326666666666667


In [82]:
# create RNN

# initialize the RNN
classifier = Sequential()

classifier.add(Embedding(input_dim=1,input_length=50, output_dim=1, trainable=False, mask_zero=True))

classifier.add(Masking(mask_value=0.0))

classifier.add(LSTM(64, return_sequences=False, dropout=0.1, recurrent_dropout=0.1))

classifier.add(Dense(64, activation="relu"))

classifier.add(Dropout(0.5))

classifier.add(Dense(1, activation="softmax"))

classifier.compile(optimizer="adam", loss="mean_squared_error", metrics=["accuracy"])

classifier.fit(X_train, y_train, batch_size=100, epochs=20)

Epoch 1/20


InvalidArgumentError: indices[0,3] = 1 is not in [0, 1)
	 [[{{node embedding_16/embedding_lookup}} = GatherV2[Taxis=DT_INT32, Tindices=DT_INT32, Tparams=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](embedding_16/embeddings/read, embedding_16/Cast, embedding_16/embedding_lookup/axis)]]

In [65]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)