In [2]:
import pandas as pd
import numpy as np
df = pd.read_csv('/content/forestfires.csv')
df.head()

Unnamed: 0,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,...,monthfeb,monthjan,monthjul,monthjun,monthmar,monthmay,monthnov,monthoct,monthsep,size_category
0,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,...,0,0,0,0,1,0,0,0,0,small
1,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,...,0,0,0,0,0,0,0,1,0,small
2,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,...,0,0,0,0,0,0,0,1,0,small
3,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,...,0,0,0,0,1,0,0,0,0,small
4,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,...,0,0,0,0,1,0,0,0,0,small


In [4]:
# removing redundant data
df_cleaned = df.drop(columns=['monthfeb','monthjan','monthmar','monthapr','monthmay','monthjun','monthjul','monthaug','monthsep',
'monthoct','monthnov','monthdec','daymon','daytue','daywed','daythu','dayfri','daysat','daysun'])
df_cleaned.head()

Unnamed: 0,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,size_category
0,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0,small
1,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0,small
2,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0,small
3,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0,small
4,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0,small


In [16]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential

          # Data Preprocessing

# Split data into features (X) and target variable (y)
X = df_cleaned.drop(columns=['area'])
y = df_cleaned['area']

# Perform one-hot encoding for categorical variables because categorical features need
#encoding and one-hot is a good way because it encode in such a way that variables do not lose their original classes
X = pd.get_dummies(X)

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features by making the mean = 0 and standard deviation = 1 so that all features
# have same scale. This will eliminate biasness in model by making influence of each feature equal.
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape the input data to include the number of timesteps because LSTM is used for sequential data
# so its input layers expect timesteps so we are converting our 1D tabular data to 2D by adding timesteps equal to 1
X_train_reshaped = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test_reshaped = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

# our target variable is skewed meaning it is not distributed well so we need to make it distributed
# by taking natural logarithms also our target variable contains values equal to or near to zero so we need to take logarithm by
# adding 1 to them before because natural logarithm of zero is undefined
y_train_log = np.log1p(y_train)
y_test_log = np.log1p(y_test)

# Define the LSTM model
model = Sequential([
    LSTM(100, input_shape=(1,X_train.shape[1])), # 100 LSTM cells in the layer
    # and batchsize of 1 beacause we are not dealing with sequential data
    Dense(1)       # Output layer with 1 neuron for regression task
])

# Compile the model
model.compile(optimizer='adam', loss='mse')  # Using mean squared error as the loss function

# Train the model
model.fit(X_train_reshaped, y_train_log, epochs=50, batch_size=32, validation_split=0.2)

       # Evaluate the model

# Predict on the test set
y_pred = model.predict(X_test_reshaped)

# Reverse the log transformation on predictions and true labels so it can calculate mean square error correctly
y_pred_inverse = np.expm1(y_pred)
y_test_inverse = np.expm1(y_test_log)

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_test_inverse, y_pred_inverse)
print("Mean Squared Error (MSE):", mse)

# Calculate Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test_inverse, y_pred_inverse)
print("Mean Absolute Error (MAE):", mae)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Mean Squared Error (MSE): 11192.721711864291
Mean Absolute Error (MAE): 16.998369288199513
