In [43]:
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import GRU, Dense
%matplotlib inline

In [2]:
data = pd.read_csv('Tractor-Sales.csv')

In [3]:
data.head()

Unnamed: 0,Month-Year,Number of Tractor Sold
0,Jan-03,141
1,Feb-03,157
2,Mar-03,185
3,Apr-03,199
4,May-03,203


In [4]:
data.columns = ['Month_Year', 'No_Tractors_Sold']

In [5]:
data['Month_Year'] = pd.to_datetime(data.Month_Year, format='%b-%y')

In [6]:
data.set_index(data.Month_Year, inplace=True)

In [7]:
data.head()

Unnamed: 0_level_0,Month_Year,No_Tractors_Sold
Month_Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2003-01-01,2003-01-01,141
2003-02-01,2003-02-01,157
2003-03-01,2003-03-01,185
2003-04-01,2003-04-01,199
2003-05-01,2003-05-01,203


In [9]:
data.drop(['Month_Year'], axis=1, inplace=True)

In [10]:
data

Unnamed: 0_level_0,No_Tractors_Sold
Month_Year,Unnamed: 1_level_1
2003-01-01,141
2003-02-01,157
2003-03-01,185
2003-04-01,199
2003-05-01,203
...,...
2014-08-01,848
2014-09-01,640
2014-10-01,581
2014-11-01,519


In [21]:
# Data preprocessing
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Function to create sequences for LSTM
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

# Hyperparameters
seq_length = 10  # Sequence length
n_features = 1  # Number of features (only 'Tractor_Sales')

# Create sequences for LSTM
X, y = create_sequences(scaled_data, seq_length)

In [22]:
X.shape

(134, 10, 1)

In [23]:
y.shape

(134, 1)

In [24]:
from sklearn.model_selection import train_test_split

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [26]:
# Print the shapes of the resulting datasets
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (107, 10, 1)
X_test shape: (27, 10, 1)
y_train shape: (107, 1)
y_test shape: (27, 1)


In [27]:
# Build LSTM model
model = Sequential([
    LSTM(units=50, activation='relu',return_sequences=True, input_shape=(seq_length, 1)),
    Dropout(0.2),
    LSTM(units=50, return_sequences=True),
    Dropout(0.2),
    LSTM(units=50, return_sequences=True),
    Dropout(0.2),
    LSTM(units=50),
    Dropout(0.2),
    Dense(units=1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [28]:
train_predictions = model.predict(X_train)
test_predictions = model.predict(X_test)



In [29]:
train_predictions.shape

(107, 1)

In [30]:
test_predictions.shape

(27, 1)

In [31]:
# Inverse scaling
train_predictions = scaler.inverse_transform(train_predictions.reshape(-1, 1)).flatten()
y_train_inv = scaler.inverse_transform(y_train).flatten()
test_predictions = scaler.inverse_transform(test_predictions.reshape(-1, 1)).flatten()
y_test_inv = scaler.inverse_transform(y_test).flatten()

In [32]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

# Calculate RMSE
train_rmse = np.sqrt(mean_squared_error(y_train_inv, train_predictions))
test_rmse = np.sqrt(mean_squared_error(y_test_inv, test_predictions))

In [33]:
train_rmse

98.87917013468662

In [34]:
test_rmse

79.36259124645488

In [35]:
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [36]:
# Calculate MAPE for test set
test_mape = mean_absolute_percentage_error(y_test_inv, test_predictions)
print("Test MAPE:", test_mape)

Test MAPE: 18.686244482990976


In [37]:
# Calculate MAPE for train set
test_mape = mean_absolute_percentage_error(y_train_inv, train_predictions)
print("Train MAPE:", test_mape)

Train MAPE: 19.27909461443806


In [38]:
test_predictions

array([599.616  , 408.29373, 512.71405, 223.22765, 326.63208, 359.371  ,
       205.66791, 559.56384, 584.5329 , 575.83875, 291.95724, 340.6328 ,
       291.26266, 338.71436, 450.99896, 260.59277, 471.79987, 215.36166,
       427.77307, 548.21075, 485.28238, 289.89014, 235.04489, 233.96013,
       193.89508, 583.3212 , 297.04282], dtype=float32)

In [39]:
y_test_inv

array([749., 454., 687., 249., 385., 270., 152., 472., 507., 536., 370.,
       330., 239., 266., 389., 305., 386., 197., 374., 470., 360., 215.,
       249., 289., 199., 454., 381.])

In [40]:
data[data['No_Tractors_Sold']==687]

Unnamed: 0_level_0,No_Tractors_Sold
Month_Year,Unnamed: 1_level_1
2012-07-01,687


In [41]:
# GRU

In [44]:
# Define the GRU model
model = Sequential([
    GRU(50, activation='relu', input_shape=(seq_length, 1)),
    Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=2)

# Evaluate the model
loss = model.evaluate(X_test, y_test, verbose=0)
print('Test Loss:', loss)

# Make predictions
predicted_test = model.predict(X_test)
predicted_train=model.predict(X_train)

Epoch 1/100
4/4 - 2s - loss: 0.3086 - 2s/epoch - 459ms/step
Epoch 2/100
4/4 - 0s - loss: 0.2587 - 47ms/epoch - 12ms/step
Epoch 3/100
4/4 - 0s - loss: 0.2163 - 45ms/epoch - 11ms/step
Epoch 4/100
4/4 - 0s - loss: 0.1761 - 44ms/epoch - 11ms/step
Epoch 5/100
4/4 - 0s - loss: 0.1454 - 53ms/epoch - 13ms/step
Epoch 6/100
4/4 - 0s - loss: 0.1164 - 43ms/epoch - 11ms/step
Epoch 7/100
4/4 - 0s - loss: 0.0920 - 51ms/epoch - 13ms/step
Epoch 8/100
4/4 - 0s - loss: 0.0702 - 43ms/epoch - 11ms/step
Epoch 9/100
4/4 - 0s - loss: 0.0542 - 53ms/epoch - 13ms/step
Epoch 10/100
4/4 - 0s - loss: 0.0425 - 43ms/epoch - 11ms/step
Epoch 11/100
4/4 - 0s - loss: 0.0339 - 49ms/epoch - 12ms/step
Epoch 12/100
4/4 - 0s - loss: 0.0309 - 46ms/epoch - 11ms/step
Epoch 13/100
4/4 - 0s - loss: 0.0315 - 42ms/epoch - 10ms/step
Epoch 14/100
4/4 - 0s - loss: 0.0312 - 42ms/epoch - 10ms/step
Epoch 15/100
4/4 - 0s - loss: 0.0295 - 43ms/epoch - 11ms/step
Epoch 16/100
4/4 - 0s - loss: 0.0272 - 41ms/epoch - 10ms/step
Epoch 17/100
4/4 -

In [45]:
# Inverse scaling
train_predictions = scaler.inverse_transform(predicted_train.reshape(-1, 1)).flatten()
y_train_inv = scaler.inverse_transform(y_train).flatten()
test_predictions = scaler.inverse_transform(predicted_test.reshape(-1, 1)).flatten()
y_test_inv = scaler.inverse_transform(y_test).flatten()

In [46]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

# Calculate RMSE
train_rmse = np.sqrt(mean_squared_error(y_train_inv, train_predictions))
test_rmse = np.sqrt(mean_squared_error(y_test_inv, test_predictions))

In [47]:
train_rmse

61.60027325819361

In [48]:
test_rmse

47.32051696133645

In [49]:
# Calculate MAPE for test set
test_mape = mean_absolute_percentage_error(y_test_inv, test_predictions)
print("Test MAPE:", test_mape)

Test MAPE: 12.297647446121273


In [50]:
# Calculate MAPE for train set
test_mape = mean_absolute_percentage_error(y_train_inv, train_predictions)
print("Train MAPE:", test_mape)

Train MAPE: 11.577977855026045


In [53]:
# gru performs better than LSTM RNN

In [52]:
# in GRU and LSTM we split the data into trainx,trainy,testx,testy but in SARIMA,holt's winter we split into
# only train and test data

In [None]:
#END