In [16]:
import sqlite3
import numpy as np
import pandas as pd
from jdcal import gcal2jd, jd2gcal, jcal2jd, MJD_0
import keras
import math
import datetime
import tensorflowjs as tfjs
import pickle
import julian as julian
import datetime
import calendar
from keras.models import Sequential, model_from_yaml, load_model
from keras.layers import Dense
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

In [38]:
def convertJulianDateToDays(y, jd):
    month = 1
    day = 0
    while jd - calendar.monthrange(y,month)[1] > 0 and month <= 12:
        jd = jd - calendar.monthrange(y,month)[1]
        month = month + 1
    return datetime.datetime(y, month, jd)

def convertJulianTimeToCategory(row):
    if row >= 0 and row < 400:
        return 'early_morning'
    elif row >= 400 and  row < 800:
        return 'mid_morning'
    elif row >= 800 and  row < 1200:
        return 'late_morning'
    elif row >=1200 and  row <1600:
        return 'afternoon'
    elif row >=1600 and  row <2000:
        return 'evening'
    elif row >=2000 and  row <2400:
        return 'night'
    else:
        return 'n_a'

def encodeOutputVariable(y):
    labelencoder_Y_Origin = LabelEncoder()
    y = labelencoder_Y_Origin.fit_transform(y.astype(str))
    return y

def encodeCategoricalData(X, index):
    # encode categorical data
    labelencoder_X_Origin = LabelEncoder()
    X[:, index] = labelencoder_X_Origin.fit_transform(X[:, index].astype(str))
    return X    

def manualEncodeLongStrings(X, column):
    index = 0
    test = 0
    keys = {}
    for row in X:
        key = row[column].replace(", ", "").replace(" ", "")
        if (keys.get(key) == None):
            keys[key] = index
            index += 1
        X[test][column] = keys.get(key)
        test += 1
    return X
    
def defaultMinimumValues(values): 
    for index, x in enumerate(values):
        x = [float(y) if y != None and float(y) > 0.0 else 0 for y in x]
        values[index] = x
    return pd.DataFrame(values)

def convertDateColumns(X, column):
    X = X[:, column].apply(lambda row: julian.from_jd(row, fmt='mmddyyyy'))
    print(X[:, column])
    return X.values

def encodeHotEncoder(X, categoryIndex):
    # meant to create dummy variables for each category data
    # you only use it for one column at a time, output will be the number of columns
    # needed to represent all discrete values of column
    onehotencoder = OneHotEncoder(categorical_features = [categoryIndex])
    X = defaultMinimumValues(X)
    X = onehotencoder.fit_transform(X.astype(str)).toarray()    
    X = X[:, 1:]
    return X

def determineTotalTime(startDay, startTime, endDay, endTime):
    minsPerDay = 24 * 60
    totalStart = (int(startDay) * minsPerDay) + int(startTime)
    totalEnd = (int(endDay) * minsPerDay) + int(endTime)
    return totalEnd - totalStart

In [39]:
sqlite_file = "./wildfires.sqlite"

# connecting to the database file and saving the select
conn = sqlite3.connect(sqlite_file)
dataset = pd.read_sql_query("select * from Fires limit 50000;", conn)

In [40]:
dataset.iloc[:,:]

Unnamed: 0,OBJECTID,FOD_ID,FPA_ID,SOURCE_SYSTEM_TYPE,SOURCE_SYSTEM,NWCG_REPORTING_AGENCY,NWCG_REPORTING_UNIT_ID,NWCG_REPORTING_UNIT_NAME,SOURCE_REPORTING_UNIT,SOURCE_REPORTING_UNIT_NAME,...,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,OWNER_CODE,OWNER_DESCR,STATE,COUNTY,FIPS_CODE,FIPS_NAME,Shape
0,1,1,FS-1418826,FED,FS-FIRESTAT,FS,USCAPNF,Plumas National Forest,0511,Plumas National Forest,...,A,40.036944,-121.005833,5.0,USFS,CA,63,063,Plumas,b'\x00\x01\xad\x10\x00\x00\xe8d\xc2\x92_@^\xc0...
1,2,2,FS-1418827,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,0503,Eldorado National Forest,...,A,38.933056,-120.404444,5.0,USFS,CA,61,061,Placer,b'\x00\x01\xad\x10\x00\x00T\xb6\xeej\xe2\x19^\...
2,3,3,FS-1418835,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,0503,Eldorado National Forest,...,A,38.984167,-120.735556,13.0,STATE OR PRIVATE,CA,17,017,El Dorado,b'\x00\x01\xad\x10\x00\x00\xd0\xa5\xa0W\x13/^\...
3,4,4,FS-1418845,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,0503,Eldorado National Forest,...,A,38.559167,-119.913333,5.0,USFS,CA,3,003,Alpine,b'\x00\x01\xad\x10\x00\x00\x94\xac\xa3\rt\xfa]...
4,5,5,FS-1418847,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,0503,Eldorado National Forest,...,A,38.559167,-119.933056,5.0,USFS,CA,3,003,Alpine,b'\x00\x01\xad\x10\x00\x00@\xe3\xaa.\xb7\xfb]\...
5,6,6,FS-1418849,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,0503,Eldorado National Forest,...,A,38.635278,-120.103611,5.0,USFS,CA,5,005,Amador,b'\x00\x01\xad\x10\x00\x00\xf0<~\x90\xa1\x06^\...
6,7,7,FS-1418851,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,0503,Eldorado National Forest,...,A,38.688333,-120.153333,5.0,USFS,CA,17,017,El Dorado,b'\x00\x01\xad\x10\x00\x00$o\x996\xd0\t^\xc0h\...
7,8,8,FS-1418854,FED,FS-FIRESTAT,FS,USCASHF,Shasta-Trinity National Forest,0514,Shasta-Trinity National Forest,...,B,40.968056,-122.433889,13.0,STATE OR PRIVATE,CA,,,,b'\x00\x01\xad\x10\x00\x00t)\xe8\xd5\xc4\x9b^\...
8,9,9,FS-1418856,FED,FS-FIRESTAT,FS,USCASHF,Shasta-Trinity National Forest,0514,Shasta-Trinity National Forest,...,B,41.233611,-122.283333,13.0,STATE OR PRIVATE,CA,,,,"b'\x00\x01\xad\x10\x00\x00\xdc\x8d\x1e""""\x92^\..."
9,10,10,FS-1418859,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,0503,Eldorado National Forest,...,A,38.548333,-120.149167,5.0,USFS,CA,5,005,Amador,b'\x00\x01\xad\x10\x00\x00dS\\\xf2\x8b\t^\xc0\...


In [56]:
# split dataset into train and test lists
X = dataset.iloc[:, [34, 30, 31, 29, 28, 19, 20, 22, 27]].values
y = dataset.iloc[:, 23].values

In [57]:
X   


array([['CA', 40.03694444, -121.00583333, ..., 2453403.5, '1300', '1730'],
       ['CA', 38.93305556, -120.40444444, ..., 2453137.5, '0845', '1530'],
       ['CA', 38.98416667, -120.73555556, ..., 2453156.5, '1921', '2024'],
       ...,
       ['OR', 42.98666667, -120.80833333, ..., 2448848.5, '2200', '0200'],
       ['OR', 43.18833333, -121.20166667, ..., 2448849.5, '0941', '1408'],
       ['OR', 43.40666667, -121.36166667, ..., 2448849.5, '1400', '1638']],
      dtype=object)

In [30]:
X = encodeCategoricalData(X, 0)
X = encodeCategoricalData(X, 3)
X = encodeHotEncoder(X, 0)
X = encodeHotEncoder(X, 3)

TypeError: unhashable type: 'slice'

In [72]:
y

array([9., 1., 5., ..., 1., 1., 1.])

In [73]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [74]:
# create ANN

# initialize the ann
classifier = Sequential()

# adding the input layer and the first hidden layer
classifier.add(Dense(100, kernel_initializer = "uniform", activation = "relu", input_dim = 44))

# adding the second hidden layer
classifier.add(Dense(50, kernel_initializer = "uniform", activation = "relu"))

# adding the third hidden layer
classifier.add(Dense(25, kernel_initializer = "uniform", activation = "relu"))

# adding the fourth hidden layer
classifier.add(Dense(10, kernel_initializer = "uniform", activation = "relu"))

# adding the output layer 
classifier.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))

# compiling the ANN
classifier.compile(optimizer = "adam", loss = "mean_squared_error", metrics = ["accuracy"])

# fitting the ANN to the training set
classifier.fit(X_train, y_train, batch_size = 10, epochs = 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20

KeyboardInterrupt: 