# #Prediction for forest fires

In [5]:
#importing library
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,LogisticRegression
from sklearn.neural_network import MLPRegressor,MLPClassifier
import warnings

In [6]:
#loading the data
df=pd.read_csv('../input/forest-fires-data-set/forestfires.csv')
df

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.00
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.00
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.00
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.00
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,4,3,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,6.44
513,2,4,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,54.29
514,7,4,aug,sun,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0,11.16
515,1,4,aug,sat,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0,0.00


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 517 entries, 0 to 516
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   X       517 non-null    int64  
 1   Y       517 non-null    int64  
 2   month   517 non-null    object 
 3   day     517 non-null    object 
 4   FFMC    517 non-null    float64
 5   DMC     517 non-null    float64
 6   DC      517 non-null    float64
 7   ISI     517 non-null    float64
 8   temp    517 non-null    float64
 9   RH      517 non-null    int64  
 10  wind    517 non-null    float64
 11  rain    517 non-null    float64
 12  area    517 non-null    float64
dtypes: float64(8), int64(3), object(2)
memory usage: 52.6+ KB


In [8]:
#checking the null  values
df.isna().sum()

X        0
Y        0
month    0
day      0
FFMC     0
DMC      0
DC       0
ISI      0
temp     0
RH       0
wind     0
rain     0
area     0
dtype: int64

# #Preprocessing the data

In [9]:
def ordinal_encode(df,column,ordering):
    df=df.copy()
    df[column]=df[column].apply(lambda x:ordering.index(x))
    return df

In [24]:
#creating a function 
def preprocess_inputs(df,task):
    df=df.copy()
    #ordinal encode and month and day column
    df=ordinal_encode(df,column='month',ordering=['jan',
                                                  'feb','mar','apr','may', 'jun', 'jul', 'aug','sep','oct','nov', 'dec'])
    #converting day column to numerical 
    df=ordinal_encode(df,column='day',ordering=['sun','mon','tue', 'wed','thu','fri',  'sat'])
    if task=='regression':
        y=df['area']
    elif task=='classification':
        y=df['area'].apply(lambda x:1 if x>0 else 0)
    x=df.drop('area',axis=1)
    x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.7,shuffle=True,random_state=1)
    #scaling feature data
    scaler=StandardScaler()
    scaler.fit(x_train)
    x_train=pd.DataFrame(scaler.transform(x_train),columns=x.columns)
    x_test= pd.DataFrame(scaler.transform(x_test),columns=x.columns)
    return x_train,x_test,y_train,y_test

In [38]:
#Regression Model
linear=LinearRegression()
linear.fit(x_train,y_train)
print('Linear Regression{}'.format(linear.score(x_test,y_test)))


Linear Regression0.021557347582511044


In [37]:
#callng the function
x_train,x_test,y_train,y_test=preprocess_inputs(df,task='regression')
x_train

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain
0,0.609497,0.549280,0.653317,-0.970161,0.060985,0.279029,0.572473,-0.406018,0.401972,-0.078036,-0.989687,-0.093528
1,-1.116216,-1.839756,-1.924250,-1.442804,-0.195757,-0.895350,-1.761748,0.137308,-2.243125,0.993349,1.363841,-0.093528
2,-0.253360,0.549280,0.223722,0.447767,-0.179711,2.460689,1.039077,-0.698578,-0.250931,0.678236,-0.473059,-0.093528
3,-0.253360,-0.247066,-1.924250,-0.497519,-0.388314,-1.295139,-1.899971,-1.074727,-0.803388,-0.015014,-0.702671,-0.093528
4,0.178069,-0.247066,0.223722,0.447767,0.686796,1.776676,0.620811,1.035885,0.234561,-0.078036,-0.702671,-0.093528
...,...,...,...,...,...,...,...,...,...,...,...,...
356,-1.116216,0.549280,1.082911,-1.442804,0.333774,-0.970311,0.593645,-0.029870,-0.585754,-0.519195,-1.735927,-0.093528
357,-1.116216,0.549280,0.223722,-0.024876,0.799121,-0.136376,-0.119444,0.889605,0.820500,-0.708263,0.847213,-0.093528
358,0.178069,-0.247066,-1.924250,0.920409,0.189357,-1.176452,-1.860422,0.011925,-0.552272,-1.149422,1.363841,-0.093528
359,1.472353,1.345625,0.223722,-1.442804,0.141217,0.527336,0.232507,0.346279,0.117373,-0.141059,1.076825,-0.093528


In [39]:
#neutral regression model MPLRegressor
nn_reg_model=MLPRegressor(hidden_layer_sizes=(16,16))
nn_reg_model.fit(x_train,y_train)
print('MLPRegression {}'.format(nn_reg_model.score(x_test,y_test)))

MLPRegression 0.049291732927311926




In [12]:
#checking unique value in month column
list(df['day'].unique())

['fri', 'tue', 'sat', 'sun', 'mon', 'wed', 'thu']

without scaling the data accuracy 


In [34]:
#Simply Classification model LogisticReggression
lin_clf_model=LogisticRegression()
lin_clf_model.fit(x_train,y_train)
lin_clf_model.score(x_test,y_test)

0.5064102564102564

In [35]:
#Neural Classification model MLPClassifier
nn_clf_model=MLPClassifier(hidden_layer_sizes=(16,16))
nn_clf_model.fit(x_train,y_train)
nn_clf_model.score(x_test,y_test)



0.5