In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Load featured dataset
df = pd.read_csv("../data/processed_data/featured_data.csv")


In [4]:
#Define Features (X) and Target (y)
X = df.drop(['Weekly_Sales', 'Date'], axis=1)
y = df['Weekly_Sales']



In [5]:
#Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
#Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [7]:
#Build the ANN Model
model = Sequential()
model.add(Dense(128, input_dim=X_train_scaled.shape[1], activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))  # Output layer for regression


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
#Compile the Model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])


In [7]:
#Train the Model
history = model.fit(X_train_scaled, y_train, 
                    validation_data=(X_test_scaled, y_test),
                    epochs=50, batch_size=64, verbose=1)


Epoch 1/50
[1m5106/5106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 4ms/step - loss: 193219776.0000 - mae: 6177.6191 - val_loss: 39482308.0000 - val_mae: 2288.5371
Epoch 2/50
[1m5106/5106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 8ms/step - loss: 51214184.0000 - mae: 2851.3601 - val_loss: 38332676.0000 - val_mae: 2191.3005
Epoch 3/50
[1m5106/5106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 8ms/step - loss: 51565404.0000 - mae: 2800.3198 - val_loss: 36980584.0000 - val_mae: 2136.7517
Epoch 4/50
[1m5106/5106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 8ms/step - loss: 50061224.0000 - mae: 2775.9854 - val_loss: 36397464.0000 - val_mae: 2119.7847
Epoch 5/50
[1m5106/5106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 7ms/step - loss: 48605496.0000 - mae: 2792.9233 - val_loss: 35920152.0000 - val_mae: 2125.7710
Epoch 6/50
[1m5106/5106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 8ms/step - loss: 51608240.0000 - mae: 2798.8

In [9]:
#Evaluate the Model
y_pred = model.predict(X_test_scaled)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.4f}")


[1m2553/2553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step
MAE: 16081.80
MSE: 774536018.52
RMSE: 27830.49
R² Score: -0.5013


In [10]:
#Save the Model and Scaler
model.save("../models/retail_sales_ann_model.h5")

import joblib
joblib.dump(scaler, "../models/scaler.pkl")




['../models/scaler.pkl']