### Model Investigation

- Random Forest Regressor (Baseline)
- Hist Gradient Boosting Regressor
- Bagging Regressor
- AdaBoost Regressor
- Stacking Regressor
- Voting Regressor

In [1]:
# Imports

from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, BaggingRegressor, StackingRegressor, VotingRegressor
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from code_files.data_preperation import prepare_for_train
from code_files.train import train
import pandas as pd
import numpy as np

In [2]:
# Load Dataset
df_amazon = pd.read_csv("dataset/eda_amazon_sales_report.csv")
df_amazon.info()

df_amazon.columns

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 117123 entries, 0 to 117122
Data columns (total 24 columns):
 #   Column                               Non-Null Count   Dtype  
---  ------                               --------------   -----  
 0   Unnamed: 0                           117123 non-null  int64  
 1   Size                                 117123 non-null  int64  
 2   Qty                                  117123 non-null  int64  
 3   Amount                               117123 non-null  float64
 4   promotion-ids                        117123 non-null  int64  
 5   B2B                                  117123 non-null  int64  
 6   Status_Cancelled                     117123 non-null  bool   
 7   Status_Shipped                       117123 non-null  bool   
 8   Status_Shipped - Delivered to Buyer  117123 non-null  bool   
 9   Fulfilment_Amazon                    117123 non-null  bool   
 10  Fulfilment_Merchant                  117123 non-null  bool   
 11  ship-service-

Index(['Unnamed: 0', 'Size', 'Qty', 'Amount', 'promotion-ids', 'B2B',
       'Status_Cancelled', 'Status_Shipped',
       'Status_Shipped - Delivered to Buyer', 'Fulfilment_Amazon',
       'Fulfilment_Merchant', 'ship-service-level_Expedited',
       'ship-service-level_Standard', 'Category_Blouse', 'Category_Bottom',
       'Category_Dupatta', 'Category_Ethnic Dress', 'Category_Saree',
       'Category_Set', 'Category_Top', 'Category_Western Dress',
       'Category_kurta', 'Month', 'Day'],
      dtype='object')

In [6]:
# CONSTANTS
MODELS = [
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    AdaBoostRegressor(),
    BaggingRegressor(),
    VotingRegressor(
        estimators=[
            ('Decision Tree', DecisionTreeRegressor(**{'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2})),
            ('Ridge', Ridge(**{'alpha': 0.1, 'fit_intercept': True, 'solver': 'sag', 'tol': 0.01})),
            ('SGD', SGDRegressor(**{'alpha': 0.0001, 'eta0': 0.001, 'learning_rate': 'invscaling', 'penalty': 'elasticnet'}))
        ]
    ),
    StackingRegressor(
        estimators=[
            ('Decision Tree', DecisionTreeRegressor(**{'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2})),
            ('Ridge', Ridge(**{'alpha': 0.1, 'fit_intercept': True, 'solver': 'sag', 'tol': 0.01})),
            ('SGD', SGDRegressor(**{'alpha': 0.0001, 'eta0': 0.001, 'learning_rate': 'invscaling', 'penalty': 'elasticnet'}))
        ]
    )
]

In [7]:
from sklearn.model_selection import train_test_split
from code_files.data_preperation import prepare_for_train

dftrain, dfdev = train_test_split(df_amazon, test_size=0.1, random_state=42)
Xtrain, ytrain, Xdev, ydev = prepare_for_train(dftrain, dfdev)

results = []
for model in MODELS:
    print(f"Model: {model.__class__.__name__}")
    _, metrics = train(model, Xtrain, ytrain, Xdev, ydev)
    metrics.insert(0, model.__class__.__name__)
    results.append(metrics)

df_results = pd.DataFrame(results, columns = ["model", "mae", "rmse", "r2"])
df_results.to_csv("dataset/results/five-model-results.csv", index=False)
df_results



Model: RandomForestRegressor
Model: GradientBoostingRegressor
Model: AdaBoostRegressor
Model: BaggingRegressor
Model: VotingRegressor
Model: StackingRegressor


Unnamed: 0,model,mae,rmse,r2
0,RandomForestRegressor,243.157248,317.102649,317.102649
1,GradientBoostingRegressor,211.452304,275.11279,275.11279
2,AdaBoostRegressor,236.646949,292.436718,292.436718
3,BaggingRegressor,249.21899,324.045099,324.045099
4,VotingRegressor,213.244832,278.245898,278.245898
5,StackingRegressor,212.710636,278.346917,278.346917
