In [None]:
! pip install kaggle
from google.colab import drive
drive.mount('/content/drive')
! mkdir ~/.kaggle
! cp /content/drive/MyDrive/cs231n/assignments/finalproject/kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json
! kaggle competitions download -c walmart-recruiting-store-sales-forecasting
! unzip walmart-recruiting-store-sales-forecasting.zip
!unzip features.csv.zip
!unzip train.csv.zip
!unzip test.csv.zip
!unzip sampleSubmission.csv.zip

In [None]:
!pip install neuralforecast

In [3]:
import pandas as pd

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
features = pd.read_csv("features.csv")
stores = pd.read_csv("stores.csv")

In [4]:
train['Date'] = pd.to_datetime(train.Date)

In [5]:
SPLIT_DATE = pd.Timestamp('2011-11-30')

# Split the dataframe based on date
train_df = train[train["Date"] < SPLIT_DATE]
val_df = train[train["Date"] >= SPLIT_DATE]

# Separate features and target
X_train = train_df.drop(columns=["Weekly_Sales"])
X_val = val_df.drop(columns=["Weekly_Sales"])

Y_train = train_df["Weekly_Sales"]
Y_val = val_df["Weekly_Sales"]

In [6]:
import numpy as np

# WMAE Metric Definition
def wmae(y_true, y_pred, is_holiday):
    """
    Weighted Mean Absolute Error.
    Weights = 5 if is_holiday else 1
    """
    weights = np.where(is_holiday == 1, 5, 1)
    return np.sum(weights * np.abs(y_true - y_pred)) / np.sum(weights)

In [7]:
import torch
import pandas as pd
from neuralforecast.models import NBEATS
from neuralforecast import NeuralForecast
from sklearn.base import BaseEstimator, TransformerMixin

class N_Beats(NBEATS):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.optimizer = torch.optim.AdamW(self.parameters(), lr=1e-3)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.9)

    def set_optim(self, optimizer):
        self.optimizer = optimizer

    def set_scheduler(self, scheduler):
        self.scheduler = scheduler

    def configure_optimizers(self):
        return {
            'optimizer': self.optimizer,
            'lr_scheduler': {
                'scheduler': self.scheduler,
                'interval': 'epoch',
                'frequency': 1
            }
        }

class Preprocessor(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None): return self
    def transform(self, X):
        x_copy = X.copy()
        if 'y' not in x_copy.columns:
            x_copy['y'] = 0
        x_copy['Date'] = pd.to_datetime(x_copy['Date'])
        return x_copy

class NBEATSModel(BaseEstimator, TransformerMixin):
    def __init__(self, models: list[NBEATS] = None):
        if models is None or len(models) == 0:
            device = 'gpu' if torch.cuda.is_available() else 'cpu'
            models = [N_Beats(
                input_size=15,
                h=48,
                max_steps=1500,
                batch_size=64,
                stack_types=['identity', 'trend', 'seasonality'],
                n_blocks=[1, 1, 1],
                random_seed=42,
                accelerator=device,
                devices=1,
                logger=True,
                enable_progress_bar=False,
                enable_model_summary=False,
            )]
        self.nf = NeuralForecast(models=models, freq='W-FRI')

    def fit(self, X, y=None):
        df = self.create_nbeats(X, y)
        df.sort_values(by=['ds'], inplace=True)
        self.nf.fit(df)
        return self

    def transform(self, X):
        preds = self.nf.predict()
        real = self.create_nbeats(X, X['y'])
        merged = pd.merge(real, preds, on=['unique_id', 'ds'], how='left')
        merged.fillna(0, inplace=True)
        return merged

    def predict(self, X=None):
        return self.transform(X)

    def create_nbeats(self, x: pd.DataFrame, y: pd.DataFrame) -> pd.DataFrame:
        df = pd.DataFrame()
        df["ds"] = pd.to_datetime(x["Date"])
        df["Store"] = x["Store"]
        df["Dept"] = x["Dept"]
        df["IsHoliday"] = x["IsHoliday"].astype(int)
        df["unique_id"] = df["Dept"].astype(str) + "_" + df["Store"].astype(str)
        df["y"] = y.values
        return df[["unique_id", "ds", "y", "IsHoliday"]].copy()

In [None]:
import wandb
wandb.login()

# Gamma, input size and forecast horizon search

In [None]:
import wandb
from sklearn.pipeline import Pipeline

gamma = [0.9, 0.8, 0.7]
h = [24, 48]
input_size = [20, 30, 40]

for lr in gamma:
    for h_ in h:
        for input_size_ in input_size:
            # Initialize wandb run
            wandb.init(
                project="walmart-forecasting",
                name="NBeats",
                config={
                    "learning_rate_gamma": lr,
                    "h": h_,
                    "input_size": input_size_,
                    "max_steps": 1500,
                    "batch_size": 64
                },
                reinit=True
            )

            estm = N_Beats(
                input_size=input_size_,
                h=h_,
                max_steps= 1500,
                batch_size=64,
                stack_types=(['identity', 'trend', 'seasonality']),
                n_blocks=[1, 1, 1],
                random_seed=42,
                devices=1,
                logger=False,
                enable_progress_bar=False,
                enable_model_summary=False,
            )

            estm.set_scheduler(
                torch.optim.lr_scheduler.StepLR(estm.optimizer, step_size=10, gamma=lr)
            )
            estimators = [estm]

            pipeline = Pipeline([
                ('preprocess', Preprocessor()),
                ('model', NBEATSModel(estimators))
            ])

            model = pipeline.fit(X_train, Y_train)

            # Log final training and validation loss
            try:
                wandb.log({
                    "train_loss": estm.loss_history['train'][-1],
                    "val_loss": estm.loss_history['val'][-1]
                })
            except:
                print("Could not log train/val loss")

            X_val['y'] = Y_val
            preds = model.predict(X_val)

            score = wmae(preds['y'], preds['N_Beats'], preds['IsHoliday'])

            # Log wmae score
            wandb.log({
                "wmae": score
            })

            print(f'learning rate = {lr}, input_size = {input_size_}, h = {h_}, wmae = {score}')

            wandb.finish()

# Stack types and block number search

In [None]:
from sklearn.pipeline import Pipeline
import torch

# Fixed hyperparameters
learning_rate_gamma = 0.7
h = 48
input_size = 40

# Grid values for stack types and blocks
stack_types_list = [
    ['identity'],
    ['trend'],
    ['identity', 'trend'],
    ['identity', 'trend', 'seasonality']
]
n_blocks_list = [
    [1],
    [2],
    [2, 2],
    [1, 1, 1]
]

for stack_types_ in stack_types_list:
    for n_blocks_ in n_blocks_list:
        if len(stack_types_) != len(n_blocks_):
            continue  # skip invalid combinations

        print(f"\nRunning config: stack_types={stack_types_}, n_blocks={n_blocks_}")

        estm = N_Beats(
            input_size=input_size,
            h=h,
            max_steps=1500,
            batch_size=64,
            stack_types=stack_types_,
            n_blocks=n_blocks_,
            random_seed=42,
            devices=1,
            logger=False,
            enable_progress_bar=False,
            enable_model_summary=False,
        )

        estm.set_scheduler(
            torch.optim.lr_scheduler.StepLR(estm.optimizer, step_size=10, gamma=learning_rate_gamma)
        )

        estimators = [estm]

        pipeline = Pipeline([
            ('preprocess', Preprocessor()),
            ('model', NBEATSModel(estimators))
        ])

        try:
            model = pipeline.fit(X_train, Y_train)

            X_val['y'] = Y_val
            preds = model.predict(X_val)

            score = wmae(preds['y'], preds['N_Beats'], preds['IsHoliday'])

            print(f"WMAE: {score:.5f}")
        except Exception as e:
            print(f"Error with config {stack_types_} / {n_blocks_}: {e}")

# Best batch size search

In [None]:
import wandb
from sklearn.pipeline import Pipeline

batch_sizes = [64, 128, 256]

for batch in batch_sizes:
    try:
        wandb.init(
            project="walmart-forecasting",
            name=f"NBeats-batch{batch}",
            config={
                "learning_rate_gamma": 0.7,
                "h": 48,
                "input_size": 40,
                "max_steps": 1500,
                "batch_size": batch
            },
            reinit=True
        )

        estm = N_Beats(
            input_size=40,
            h=48,
            max_steps=1500,
            batch_size=batch,
            stack_types=(['identity', 'trend', 'seasonality']),
            n_blocks=[1, 1, 1],
            random_seed=42,
            devices=1,
            logger=False,
            enable_progress_bar=False,
            enable_model_summary=False,
        )

        estm.set_scheduler(torch.optim.lr_scheduler.StepLR(estm.optimizer, step_size=10, gamma=0.7))
        estimators = [estm]

        pipeline = Pipeline([
            ('preprocess', Preprocessor()),
            ('model', NBEATSModel(estimators))
        ])

        # Fit model
        model = pipeline.fit(X_train, Y_train)

        # Log train/val loss if available
        try:
            wandb.log({
                "train_loss": estm.loss_history['train'][-1],
                "val_loss": estm.loss_history['val'][-1]
            })
        except Exception as e:
            print(f"[WARN] Could not log losses: {e}")

        # Predict and score
        X_val['y'] = Y_val
        preds = model.predict(X_val)

        score = wmae(preds['y'], preds['N_Beats'], preds['IsHoliday'])

        # Log final WMAE
        wandb.log({"wmae": score})
        print(f'[BATCH={batch}] WMAE: {score:.4f}')

    finally:
        wandb.finish()

INFO:lightning_fabric.utilities.seed:Seed set to 42
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=1500` reached.


[WARN] Could not log losses: 'N_Beats' object has no attribute 'loss_history'


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


[BATCH=64] WMAE: 2116.8844


0,1
wmae,▁

0,1
wmae,2116.88437


INFO:lightning_fabric.utilities.seed:Seed set to 42
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=1500` reached.


[WARN] Could not log losses: 'N_Beats' object has no attribute 'loss_history'


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


[BATCH=128] WMAE: 2092.8477


0,1
wmae,▁

0,1
wmae,2092.84773


INFO:lightning_fabric.utilities.seed:Seed set to 42
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=1500` reached.


[WARN] Could not log losses: 'N_Beats' object has no attribute 'loss_history'


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


[BATCH=256] WMAE: 2087.4574


0,1
wmae,▁

0,1
wmae,2087.45741


In [None]:
test.head()

Unnamed: 0,Store,Dept,Date,IsHoliday
0,1,1,2012-11-02,False
1,1,1,2012-11-09,False
2,1,1,2012-11-16,False
3,1,1,2012-11-23,True
4,1,1,2012-11-30,False


In [None]:
test['Date'] = pd.to_datetime(test['Date'])
preds = pipeline.predict(test)
preds = preds[['Id', 'N_Beats']]

submission = pd.merge(test[['Id']], preds, on='Id', how='left')

submission.to_csv("submission_batch256.csv", index=False)
print("✅ Final submission saved as 'submission_batch256.csv'")

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


✅ Final submission saved as 'submission_batch256.csv'
