In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import OneHotEncoder,PowerTransformer, StandardScaler, MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

import pickle
import warnings
warnings.filterwarnings(action='ignore')

In [2]:
file_path = "Outlier_removed.csv"
df = pd.read_csv(file_path)
df['Classes']=df['Classes'].apply(lambda x :1 if x == 'fire' else 0)

In [3]:
X = df.drop(columns=['Classes','year'],axis=1)
y= df['Classes']

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
print(X_train.shape,y_train.shape,X_test.shape,y_test.shape)

(182, 12) (182,) (61, 12) (61,)


## LogisticRegression

In [5]:
#Using LogisticRegression
lr = LogisticRegression()  
pipe = Pipeline([
    ('lr', lr)
    ])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
accuracy_score(y_test, y_pred)

0.9344262295081968

In [6]:
#Confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix: \n",confusion_mat)
print(classification_report(y_test, y_pred))

Confusion matrix: 
 [[26  1]
 [ 3 31]]
              precision    recall  f1-score   support

           0       0.90      0.96      0.93        27
           1       0.97      0.91      0.94        34

    accuracy                           0.93        61
   macro avg       0.93      0.94      0.93        61
weighted avg       0.94      0.93      0.93        61



## Support Vector

In [7]:
#Using Support Vector
svc = SVC()  
pipe = Pipeline([
    ('svc', svc)
    ])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
accuracy_score(y_test, y_pred)

0.8524590163934426

In [8]:
#Confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix: \n",confusion_mat)
print(classification_report(y_test, y_pred))

Confusion matrix: 
 [[23  4]
 [ 5 29]]
              precision    recall  f1-score   support

           0       0.82      0.85      0.84        27
           1       0.88      0.85      0.87        34

    accuracy                           0.85        61
   macro avg       0.85      0.85      0.85        61
weighted avg       0.85      0.85      0.85        61



## GaussianNB

In [9]:
#Using GaussianNB
gaussian = GaussianNB()
pipe = Pipeline([
    ('nb', gaussian)
    ])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
accuracy_score(y_test, y_pred)

0.9016393442622951

In [10]:
#Confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix: \n",confusion_mat)
print(classification_report(y_test, y_pred))

Confusion matrix: 
 [[25  2]
 [ 4 30]]
              precision    recall  f1-score   support

           0       0.86      0.93      0.89        27
           1       0.94      0.88      0.91        34

    accuracy                           0.90        61
   macro avg       0.90      0.90      0.90        61
weighted avg       0.90      0.90      0.90        61



## KNN Neighbors

In [12]:
#Using KNN Neighbors
knn = KNeighborsClassifier()  
pipe = Pipeline([
    ('svc', knn)
    ])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
accuracy_score(y_test, y_pred)

0.8688524590163934

In [13]:
#Confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix: \n",confusion_mat)
print(classification_report(y_test, y_pred))

Confusion matrix: 
 [[22  5]
 [ 3 31]]
              precision    recall  f1-score   support

           0       0.88      0.81      0.85        27
           1       0.86      0.91      0.89        34

    accuracy                           0.87        61
   macro avg       0.87      0.86      0.87        61
weighted avg       0.87      0.87      0.87        61



## Decision Tree

In [14]:
#Using Decision Tree
tree = DecisionTreeClassifier()
pipe = Pipeline([
    ('tree', tree)
    ])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
accuracy_score(y_test, y_pred)

0.9836065573770492

In [15]:
#Confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix: \n",confusion_mat)
print(classification_report(y_test, y_pred))

Confusion matrix: 
 [[27  0]
 [ 1 33]]
              precision    recall  f1-score   support

           0       0.96      1.00      0.98        27
           1       1.00      0.97      0.99        34

    accuracy                           0.98        61
   macro avg       0.98      0.99      0.98        61
weighted avg       0.98      0.98      0.98        61



## Random Forest Tree

In [16]:
#Using Random Forest Tree
rf = RandomForestClassifier()
pipe = Pipeline([
    ('rf', rf)
    ])
pipe.fit(X_train.values, y_train.values)
y_pred = pipe.predict(X_test.values)
accuracy_score(y_test, y_pred)

0.9836065573770492

In [17]:
#Confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix: \n",confusion_mat)
print(classification_report(y_test, y_pred))

Confusion matrix: 
 [[27  0]
 [ 1 33]]
              precision    recall  f1-score   support

           0       0.96      1.00      0.98        27
           1       1.00      0.97      0.99        34

    accuracy                           0.98        61
   macro avg       0.98      0.99      0.98        61
weighted avg       0.98      0.98      0.98        61



In [22]:
results = pd.DataFrame({
    'Model': ['Logistic Regression','Support Vector Machines', 
              'Naive Bayes','KNN' ,'Decision Tree','Random Forest'],
    'Score': [0.93,0.85,0.90,0.86,0.98,0.98]})

result_df = results.sort_values(by='Score', ascending=False)
result_df = result_df.set_index('Score')
result_df.head(9)

Unnamed: 0_level_0,Model
Score,Unnamed: 1_level_1
0.98,Decision Tree
0.98,Random Forest
0.93,Logistic Regression
0.9,Naive Bayes
0.86,KNN
0.85,Support Vector Machines


In [23]:
### Creating pickle file
pickle.dump(pipe,open('../pipe_class2.pkl','wb'))

In [25]:
pickle_model = pickle.load(open('pipe_class2.pkl','rb'))

In [27]:
dict_test ={'day':1, 'month':6, 'Temperature':26,'RH':57, 'Ws':18.0, 'Rain':0.00, 'FFMC':65.7000,
       'DMC':3.4, 'DC':7.6, 'ISI':1.3, 'BUI':3.4, 'FWI':0.5}

In [28]:
list(dict_test.values())

[1, 6, 26, 57, 18.0, 0.0, 65.7, 3.4, 7.6, 1.3, 3.4, 0.5]

In [29]:
input = np.array(list(dict_test.values())).reshape(1,12)

In [32]:
pickle_model.predict(input)[0]

0