<a href="https://colab.research.google.com/github/wrymp/Final-Project-Walmart-Recruiting---Store-Sales-Forecasting/blob/main/model_experiment_NBEATS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# N-BEATS Implementation for Walmart Sales Forecasting

This notebook implements N-BEATS (Neural Basis Expansion Analysis for Time Series) for Walmart sales forecasting following the exact pipeline structure from fraud detection experiments.

In [1]:
from google.colab import drive
!pip install wandb -q
!pip install kaggle -q

drive.mount('/content/drive')
!mkdir -p ~/.kaggle
!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# ! kaggle competitions download -c walmart-recruiting-store-sales-forecasting
# ! unzip /content/walmart-recruiting-store-sales-forecasting.zip
# ! unzip /content/train.csv.zip
# ! unzip /content/test.csv.zip
# ! unzip /content/features.csv.zip
# ! unzip /content/sampleSubmission.csv.zip

# Setup & Imports

In [3]:
import pandas as pd
import numpy as np
import wandb
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import warnings
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
import gc
import os
import pickle
import cloudpickle

warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8')

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

print("Libraries imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Libraries imported successfully!
PyTorch version: 2.6.0+cu124
CUDA available: True
Using device: cuda


# Wandb Initialization

In [4]:
# Initialize Wandb project
wandb.login()
try:
    wandb.init(
        project="walmart-sales-forecasting",
        name="NBEATS_Initial_Setup",
        config={
            "model_type": "NBEATS",
            "framework": "PyTorch",
            "device": str(device),
            "random_seed": 42
        }
    )
    print("✓ Wandb initialized successfully!")
except Exception as e:
    print(f"⚠️ Wandb initialization failed: {e}")
    print("Continuing without wandb logging...")

[34m[1mwandb[0m: Currently logged in as: [33mqitiashvili13[0m ([33mdshan21-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


✓ Wandb initialized successfully!


# Data Loading

In [8]:
# Load Walmart datasets
print("Loading Walmart datasets...")

try:
    train_df = pd.read_csv('/content/drive/MyDrive/walmart-recruiting-store-sales-forecasting/train.csv/train.csv')
    test_df = pd.read_csv('/content/drive/MyDrive/walmart-recruiting-store-sales-forecasting/test.csv/test.csv')
    stores_df = pd.read_csv('/content/drive/MyDrive/walmart-recruiting-store-sales-forecasting/stores.csv')
    features_df = pd.read_csv('/content/drive/MyDrive/walmart-recruiting-store-sales-forecasting/features.csv/features.csv')

    print(f"✓ Train data shape: {train_df.shape}")
    print(f"✓ Test data shape: {test_df.shape}")
    print(f"✓ Stores data shape: {stores_df.shape}")
    print(f"✓ Features data shape: {features_df.shape}")

    # Log basic dataset info
    wandb.log({
        "train_samples": len(train_df),
        "test_samples": len(test_df),
        "num_stores": stores_df['Store'].nunique(),
        "num_departments": train_df['Dept'].nunique(),
        "date_range_train": f"{train_df['Date'].min()} to {train_df['Date'].max()}"
    })

except FileNotFoundError as e:
    print(f"Error loading data: {e}")
    print("Please ensure data files are in './data/' directory")
    raise

Loading Walmart datasets...
✓ Train data shape: (421570, 5)
✓ Test data shape: (115064, 4)
✓ Stores data shape: (45, 3)
✓ Features data shape: (8190, 12)


# Data Exploration Run

In [9]:
# Start new wandb run for exploration
wandb.finish()
wandb.init(
    project="walmart-sales-forecasting",
    name="NBEATS_Exploration",
    config={"stage": "exploration"}
)

print("\n=== DATA EXPLORATION ===")

# Convert date columns
train_df['Date'] = pd.to_datetime(train_df['Date'])
test_df['Date'] = pd.to_datetime(test_df['Date'])
features_df['Date'] = pd.to_datetime(features_df['Date'])

# Basic statistics
print("\nTrain Data Info:")
print(f"Date range: {train_df['Date'].min()} to {train_df['Date'].max()}")
print(f"Unique stores: {train_df['Store'].nunique()}")
print(f"Unique departments: {train_df['Dept'].nunique()}")
print(f"Total store-dept combinations: {train_df.groupby(['Store', 'Dept']).ngroups}")

# Sales statistics
print(f"\nSales Statistics:")
print(f"Mean weekly sales: ${train_df['Weekly_Sales'].mean():,.2f}")
print(f"Median weekly sales: ${train_df['Weekly_Sales'].median():,.2f}")
print(f"Min weekly sales: ${train_df['Weekly_Sales'].min():,.2f}")
print(f"Max weekly sales: ${train_df['Weekly_Sales'].max():,.2f}")

# Holiday impact
holiday_sales = train_df.groupby('IsHoliday')['Weekly_Sales'].agg(['mean', 'count'])
print(f"\nHoliday Impact:")
print(holiday_sales)

# Store types
store_types = stores_df['Type'].value_counts()
print(f"\nStore Types:")
print(store_types)

# Missing values in features
print(f"\nMissing Values in Features:")
missing_pct = (features_df.isnull().sum() / len(features_df)) * 100
print(missing_pct[missing_pct > 0].sort_values(ascending=False))

# Log exploration metrics
wandb.log({
    "unique_stores": train_df['Store'].nunique(),
    "unique_departments": train_df['Dept'].nunique(),
    "total_timeseries": train_df.groupby(['Store', 'Dept']).ngroups,
    "avg_weekly_sales": train_df['Weekly_Sales'].mean(),
    "median_weekly_sales": train_df['Weekly_Sales'].median(),
    "sales_std": train_df['Weekly_Sales'].std(),
    "holiday_sales_boost": holiday_sales.loc[True, 'mean'] / holiday_sales.loc[False, 'mean'],
    "missing_markdown1_pct": missing_pct['MarkDown1'],
    "missing_markdown2_pct": missing_pct['MarkDown2'],
    "missing_markdown3_pct": missing_pct['MarkDown3'],
    "missing_markdown4_pct": missing_pct['MarkDown4'],
    "missing_markdown5_pct": missing_pct['MarkDown5']
})

print("\n✓ Exploration completed and logged to wandb")

0,1
num_departments,▁▁
num_stores,▁▁
test_samples,▁▁
train_samples,▁▁

0,1
date_range_train,2010-02-05 to 2012-1...
num_departments,81
num_stores,45
test_samples,115064
train_samples,421570



=== DATA EXPLORATION ===

Train Data Info:
Date range: 2010-02-05 00:00:00 to 2012-10-26 00:00:00
Unique stores: 45
Unique departments: 81
Total store-dept combinations: 3331

Sales Statistics:
Mean weekly sales: $15,981.26
Median weekly sales: $7,612.03
Min weekly sales: $-4,988.94
Max weekly sales: $693,099.36

Holiday Impact:
                   mean   count
IsHoliday                      
False      15901.445069  391909
True       17035.823187   29661

Store Types:
Type
A    22
B    17
C     6
Name: count, dtype: int64

Missing Values in Features:
MarkDown2       64.334554
MarkDown4       57.704518
MarkDown3       55.885226
MarkDown1       50.769231
MarkDown5       50.549451
CPI              7.142857
Unemployment     7.142857
dtype: float64

✓ Exploration completed and logged to wandb


# Custom Transformers for Time-Series Pipeline

In [46]:
class TimeSeriesDataProcessor(BaseEstimator, TransformerMixin):
    """Processes raw Walmart data into time-series format for N-BEATS"""

    def __init__(self, lookback_window=52, forecast_horizon=1):
        self.lookback_window = lookback_window  # 52 weeks = 1 year
        self.forecast_horizon = forecast_horizon
        self.store_dept_combinations = None
        self.date_range = None

    def fit(self, X, y=None):
        """Learn the store-department combinations and date range"""
        # X should be merged dataframe with all info
        self.store_dept_combinations = X.groupby(['Store', 'Dept']).size().index.tolist()
        self.date_range = sorted(X['Date'].unique())
        print(f"Found {len(self.store_dept_combinations)} store-dept combinations")
        print(f"Date range: {self.date_range[0]} to {self.date_range[-1]}")
        return self

    def transform(self, X):
          """Transform data into sequences for N-BEATS"""
          sequences = []
          targets = []
          metadata = []

          for store, dept in self.store_dept_combinations:
              # Get time series for this store-dept combination
              series_data = X[(X['Store'] == store) & (X['Dept'] == dept)].copy()
              series_data = series_data.sort_values('Date')

              if len(series_data) < self.lookback_window + self.forecast_horizon:
                  continue  # Skip if not enough data

              # Create sliding windows
              for i in range(len(series_data) - self.lookback_window - self.forecast_horizon + 1):
                  # Input sequence (features + target)
                  window_data = series_data.iloc[i:i + self.lookback_window]

                  # Sales sequence (target variable)
                  sales_sequence = window_data['Weekly_Sales'].values

                  # Check for valid sales data
                  if np.any(np.isnan(sales_sequence)) or np.any(np.isinf(sales_sequence)):
                      continue

                  # External features (if available)
                  external_features = []
                  if 'Temperature' in window_data.columns:
                      temp_vals = window_data['Temperature'].fillna(method='ffill').fillna(method='bfill').fillna(0)
                      external_features.append(temp_vals.values)
                  if 'Fuel_Price' in window_data.columns:
                      fuel_vals = window_data['Fuel_Price'].fillna(method='ffill').fillna(method='bfill').fillna(0)
                      external_features.append(fuel_vals.values)
                  if 'CPI' in window_data.columns:
                      cpi_vals = window_data['CPI'].fillna(method='ffill').fillna(method='bfill').fillna(0)
                      external_features.append(cpi_vals.values)
                  if 'Unemployment' in window_data.columns:
                      unemp_vals = window_data['Unemployment'].fillna(method='ffill').fillna(method='bfill').fillna(0)
                      external_features.append(unemp_vals.values)

                  # Combine sales with external features
                  if external_features:
                      try:
                          feature_matrix = np.column_stack([sales_sequence] + external_features)
                      except ValueError:
                          # If dimensions don't match, use only sales
                          feature_matrix = sales_sequence.reshape(-1, 1)
                  else:
                      feature_matrix = sales_sequence.reshape(-1, 1)

                  sequences.append(feature_matrix)

                  # Target (next forecast_horizon values)
                  target_data = series_data.iloc[i + self.lookback_window:i + self.lookback_window + self.forecast_horizon]
                  target_sales = target_data['Weekly_Sales'].values

                  # Check for valid target data
                  if np.any(np.isnan(target_sales)) or np.any(np.isinf(target_sales)):
                      continue

                  targets.append(target_sales)

            # Metadata
                  metadata.append({
                      'store': store,
                      'dept': dept,
                      'start_date': window_data['Date'].iloc[0],
                      'end_date': window_data['Date'].iloc[-1],
                      'forecast_date': target_data['Date'].iloc[0] if len(target_data) > 0 else None
                  })

          print(f"Generated {len(sequences)} valid sequences from {len(self.store_dept_combinations)} store-dept combinations")

          return {
              'sequences': np.array(sequences, dtype=object),
              'targets': np.array(targets, dtype=object),
              'metadata': metadata
          }

class FeatureMerger(BaseEstimator, TransformerMixin):
    """Merges train/test data with stores and features data"""

    def __init__(self):
        self.stores_data = None
        self.features_data = None

    def fit(self, X, y=None, stores_df=None, features_df=None):
        """Store the auxiliary dataframes"""
        self.stores_data = stores_df.copy() if stores_df is not None else None
        self.features_data = features_df.copy() if features_df is not None else None
        return self

    def transform(self, X):
        """Merge main data with stores and features"""
        result = X.copy()

        # Merge with stores data
        if self.stores_data is not None:
            result = result.merge(self.stores_data, on='Store', how='left')

        # Merge with features data
        if self.features_data is not None:
            result = result.merge(self.features_data, on=['Store', 'Date'], how='left')

        return result

class MissingValueHandler(BaseEstimator, TransformerMixin):
    """Handle missing values in time-series data"""

    def __init__(self):
        self.fill_values = {}

    def fit(self, X, y=None):
        """Learn fill values for missing data"""
        # For MarkDown columns, fill with 0 (no markdown)
        markdown_cols = [col for col in X.columns if 'MarkDown' in col]
        for col in markdown_cols:
            self.fill_values[col] = 0.0

        # For other numerical columns, use median
        numerical_cols = X.select_dtypes(include=[np.number]).columns
        for col in numerical_cols:
            if col not in self.fill_values and X[col].isnull().any():
                self.fill_values[col] = X[col].median()

        return self

    def transform(self, X):
        """Fill missing values"""
        result = X.copy()
        for col, fill_value in self.fill_values.items():
            if col in result.columns:
                result[col] = result[col].fillna(fill_value)
        return result

print("✓ Custom transformers defined")

✓ Custom transformers defined


# N-BEATS Model Implementation

In [48]:
class NBeatsBlock(nn.Module):
    """Single N-BEATS block"""

    def __init__(self, input_size, theta_size, basis_size, num_layers=4, layer_size=512):
        super().__init__()
        self.input_size = input_size
        self.theta_size = theta_size
        self.basis_size = basis_size

        # Fully connected layers
        self.fc_layers = nn.ModuleList()
        self.fc_layers.append(nn.Linear(input_size, layer_size))
        for _ in range(num_layers - 1):
            self.fc_layers.append(nn.Linear(layer_size, layer_size))

        self.relu = nn.ReLU()

        # Output layers
        self.theta_b = nn.Linear(layer_size, theta_size)  # Backcast coefficients
        self.theta_f = nn.Linear(layer_size, theta_size)  # Forecast coefficients

        print(f"NBeatsBlock initialized - input_size: {input_size}, layer_size: {layer_size}, theta_size: {theta_size}")
        print(f"NBeatsBlock theta_b weight shape (in __init__): {self.theta_b.weight.shape}") # Debug print
        print(f"NBeatsBlock theta_f weight shape (in __init__): {self.theta_f.weight.shape}") # Debug print


    def forward(self, x):
        # print(f"NBeatsBlock input shape (in forward): {x.shape}") # Debug print
        for i, layer in enumerate(self.fc_layers):
            x = self.relu(layer(x))
            # print(f"NBeatsBlock after layer {i} shape (in forward): {x.shape}") # Debug print

        # print(f"Shape before theta_b: {x.shape}, theta_b weight shape: {self.theta_b.weight.shape}") # Debug print before multiplication
        theta_b = self.theta_b(x)
        # print(f"NBeatsBlock output theta_b shape (in forward): {theta_b.shape}") # Debug print

        # print(f"Shape before theta_f: {x.shape}, theta_f weight shape: {self.theta_f.weight.shape}") # Debug print before multiplication
        theta_f = self.theta_f(x)
        # print(f"NBeatsBlock output theta_f shape (in forward): {theta_f.shape}") # Debug print
        return theta_b, theta_f

class GenericBasis(nn.Module):
    """Generic basis functions for N-BEATS"""

    def __init__(self, backcast_size, forecast_size):
        super().__init__()
        self.backcast_size = backcast_size
        self.forecast_size = forecast_size

    def forward(self, theta_b, theta_f):
        # For generic basis, theta directly represents the values
        backcast = theta_b[:, :self.backcast_size]
        forecast = theta_f[:, :self.forecast_size]
        return backcast, forecast

class NBEATSModel(nn.Module):
    """N-BEATS model with multiple stacks and blocks"""

    def __init__(self, input_size, forecast_size, num_stacks=2, num_blocks_per_stack=3,
                 num_layers=4, layer_size=512, num_features=1):
        super().__init__()
        self.input_size = input_size
        self.forecast_size = forecast_size
        self.num_features = num_features

        # If we have multiple features, add a projection layer
        if num_features > 1:
            self.feature_projection = nn.Linear(num_features, 1)
        else:
            self.feature_projection = None

        # Create stacks of blocks
        self.stacks = nn.ModuleList()

        for stack_id in range(num_stacks):
            stack = nn.ModuleList()
            for block_id in range(num_blocks_per_stack):
                block = NBeatsBlock(
                    input_size=input_size,
                    theta_size=max(input_size, forecast_size),
                    basis_size=max(input_size, forecast_size),
                    num_layers=num_layers,
                    layer_size=layer_size
                )
                stack.append(block)
            self.stacks.append(stack)

        # Basis functions
        self.basis = GenericBasis(input_size, forecast_size)

    def forward(self, x):
        # x shape: (batch_size, sequence_length, num_features)
        batch_size = x.size(0)

        # If multiple features, project to univariate
        if self.feature_projection is not None:
            # Take only the sales feature (first column) for now
            x = x[:, :, 0]  # Shape: (batch_size, sequence_length)
        else:
            x = x.squeeze(-1)  # Remove last dimension if single feature

        residual = x
        forecast = torch.zeros(batch_size, self.forecast_size, device=x.device)

        # Process through stacks
        for stack in self.stacks:
            for block in stack:
                theta_b, theta_f = block(residual)
                backcast, block_forecast = self.basis(theta_b, theta_f)

                # Ensure correct dimensions
                if backcast.size(1) != residual.size(1):
                    backcast = backcast[:, :residual.size(1)]
                if block_forecast.size(1) != self.forecast_size:
                    block_forecast = block_forecast[:, :self.forecast_size]

                residual = residual - backcast
                forecast = forecast + block_forecast

        return forecast

class WalmartDataset(Dataset):
    """PyTorch dataset for Walmart time series data"""

    def __init__(self, sequences, targets, device='cpu'):
        self.sequences = sequences
        self.targets = targets
        self.device = device # Store the device

    def __len__(self):
        return len(self.sequences)
    def __getitem__(self, idx):
          # Create tensors on CPU - device transfer happens in training loop
          sequence = torch.FloatTensor(self.sequences[idx])
          target = torch.FloatTensor(self.targets[idx])
          return sequence, target

print("✓ N-BEATS model architecture defined")

✓ N-BEATS model architecture defined


# Data Cleaning Run

In [49]:
# Start new wandb run for data cleaning
wandb.finish()
wandb.init(
    project="walmart-sales-forecasting",
    name="NBEATS_Cleaning",
    config={"stage": "cleaning"}
)

print("\n=== DATA CLEANING ===")

# Create feature merger and missing value handler
feature_merger = FeatureMerger()
missing_handler = MissingValueHandler()

# Fit the merger with auxiliary data
feature_merger.fit(train_df, stores_df=stores_df, features_df=features_df)

# Merge train data with stores and features
print("Merging train data with stores and features...")
train_merged = feature_merger.transform(train_df)
print(f"Train data shape after merging: {train_merged.shape}")

# Fit and transform missing values
print("Handling missing values...")
missing_handler.fit(train_merged)
train_cleaned = missing_handler.transform(train_merged)

# Check for remaining missing values
remaining_missing = train_cleaned.isnull().sum()
remaining_missing = remaining_missing[remaining_missing > 0]

print(f"\nRemaining missing values after cleaning:")
if len(remaining_missing) > 0:
    print(remaining_missing)
else:
    print("No missing values remaining!")

# Basic data quality checks
print(f"\nData quality checks:")
print(f"Total records: {len(train_cleaned):,}")
print(f"Date range: {train_cleaned['Date'].min()} to {train_cleaned['Date'].max()}")
print(f"Unique store-dept combinations: {train_cleaned.groupby(['Store', 'Dept']).ngroups:,}")

# Check for negative sales (data quality issue)
negative_sales = (train_cleaned['Weekly_Sales'] < 0).sum()
print(f"Records with negative sales: {negative_sales:,} ({negative_sales/len(train_cleaned)*100:.2f}%)")

# Log cleaning metrics
wandb.log({
    "cleaned_records": len(train_cleaned),
    "remaining_missing_values": len(remaining_missing),
    "negative_sales_count": int(negative_sales),
    "negative_sales_pct": float(negative_sales/len(train_cleaned)*100),
    "store_dept_combinations": train_cleaned.groupby(['Store', 'Dept']).ngroups
})

print("\n✓ Data cleaning completed and logged to wandb")

# Save cleaned data for next steps
print("\nSample of cleaned data:")
print(train_cleaned.head())
print(f"\nColumns: {list(train_cleaned.columns)}")


=== DATA CLEANING ===
Merging train data with stores and features...
Train data shape after merging: (421570, 17)
Handling missing values...

Remaining missing values after cleaning:
No missing values remaining!

Data quality checks:
Total records: 421,570
Date range: 2010-02-05 00:00:00 to 2012-10-26 00:00:00
Unique store-dept combinations: 3,331
Records with negative sales: 1,285 (0.30%)

✓ Data cleaning completed and logged to wandb

Sample of cleaned data:
   Store  Dept       Date  Weekly_Sales  IsHoliday_x Type    Size  \
0      1     1 2010-02-05      24924.50        False    A  151315   
1      1     1 2010-02-12      46039.49         True    A  151315   
2      1     1 2010-02-19      41595.55        False    A  151315   
3      1     1 2010-02-26      19403.54        False    A  151315   
4      1     1 2010-03-05      21827.90        False    A  151315   

   Temperature  Fuel_Price  MarkDown1  MarkDown2  MarkDown3  MarkDown4  \
0        42.31       2.572        0.0        

# Feature Selection Run

In [50]:
# Start new wandb run for feature selection
wandb.finish()
wandb.init(
    project="walmart-sales-forecasting",
    name="NBEATS_Feature_Selection",
    config={"stage": "feature_selection"}
)

print("\n=== FEATURE SELECTION ===")

# For N-BEATS, we'll focus on selecting the most relevant external features
# The main target will always be Weekly_Sales

# Define feature categories
core_features = ['Store', 'Dept', 'Date', 'Weekly_Sales', 'IsHoliday_x'] # Updated to IsHoliday_x
store_features = ['Type', 'Size']
economic_features = ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment']
markdown_features = ['MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5']

print(f"Available columns: {list(train_cleaned.columns)}")

# Analyze correlation between external features and sales
correlation_analysis = {}

# Economic features correlation
for feature in economic_features:
    if feature in train_cleaned.columns:
        corr = train_cleaned['Weekly_Sales'].corr(train_cleaned[feature])
        correlation_analysis[feature] = corr
        print(f"Correlation between Weekly_Sales and {feature}: {corr:.4f}")

# Markdown features correlation (only for periods where they exist)
markdown_period = train_cleaned[train_cleaned['Date'] >= '2011-11-01']  # MarkDowns available after Nov 2011
for feature in markdown_features:
    if feature in markdown_period.columns:
        # Only calculate correlation where markdown is not null
        valid_data = markdown_period.dropna(subset=[feature])
        if len(valid_data) > 0:
            corr = valid_data['Weekly_Sales'].corr(valid_data[feature])
            correlation_analysis[feature] = corr
            print(f"Correlation between Weekly_Sales and {feature}: {corr:.4f} (from {len(valid_data)} records)")

# Holiday impact analysis
# Using 'IsHoliday_x' as the merged dataframe has this column
holiday_impact = train_cleaned.groupby('IsHoliday_x')['Weekly_Sales'].mean()
holiday_boost = holiday_impact[True] / holiday_impact[False] - 1
print(f"\nHoliday sales boost: {holiday_boost:.2%}")

# Store type impact
if 'Type' in train_cleaned.columns:
    store_type_sales = train_cleaned.groupby('Type')['Weekly_Sales'].mean()
    print(f"\nAverage sales by store type:")
    print(store_type_sales)

# Select features based on correlation and business logic
selected_features = core_features.copy()

# Add economic features with significant correlation
for feature, corr in correlation_analysis.items():
    if abs(corr) > 0.01:  # Threshold for significance
        if feature in train_cleaned.columns:
            selected_features.append(feature)
        print(f"Selected {feature} (correlation: {corr:.4f})")

# Always include store features
for feature in store_features:
    if feature in train_cleaned.columns:
        selected_features.append(feature)

# Remove duplicates
selected_features = list(set(selected_features))

print(f"\nFinal selected features ({len(selected_features)}): {selected_features}")

# Create feature-selected dataset
train_selected = train_cleaned[selected_features].copy()

print(f"\nFeature-selected data shape: {train_selected.shape}")

# Log feature selection metrics
wandb.log({
    "total_available_features": len(train_cleaned.columns),
    "selected_features_count": len(selected_features),
    "holiday_sales_boost": float(holiday_boost),
    "selected_features": selected_features,
    **{f"corr_{k}": v for k, v in correlation_analysis.items() if not np.isnan(v)}
})

print("\n✓ Feature selection completed and logged to wandb")

0,1
cleaned_records,▁
negative_sales_count,▁
negative_sales_pct,▁
remaining_missing_values,▁
store_dept_combinations,▁

0,1
cleaned_records,421570.0
negative_sales_count,1285.0
negative_sales_pct,0.30481
remaining_missing_values,0.0
store_dept_combinations,3331.0



=== FEATURE SELECTION ===
Available columns: ['Store', 'Dept', 'Date', 'Weekly_Sales', 'IsHoliday_x', 'Type', 'Size', 'Temperature', 'Fuel_Price', 'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5', 'CPI', 'Unemployment', 'IsHoliday_y']
Correlation between Weekly_Sales and Temperature: -0.0023
Correlation between Weekly_Sales and Fuel_Price: -0.0001
Correlation between Weekly_Sales and CPI: -0.0209
Correlation between Weekly_Sales and Unemployment: -0.0259
Correlation between Weekly_Sales and MarkDown1: 0.0848 (from 154386 records)
Correlation between Weekly_Sales and MarkDown2: 0.0318 (from 154386 records)
Correlation between Weekly_Sales and MarkDown3: 0.0615 (from 154386 records)
Correlation between Weekly_Sales and MarkDown4: 0.0608 (from 154386 records)
Correlation between Weekly_Sales and MarkDown5: 0.0888 (from 154386 records)

Holiday sales boost: 7.13%

Average sales by store type:
Type
A    20099.568043
B    12237.075977
C     9519.532538
Name: Weekly_Sales, dty

# Cross Validation Run

In [51]:
# Start new wandb run for cross validation
wandb.finish()
wandb.init(
    project="walmart-sales-forecasting",
    name="NBEATS_Cross_Validation",
    config={
        "stage": "cross_validation",
        "lookback_window": 52,
        "forecast_horizon": 1,
        "model_type": "NBEATS"
    }
)

print("\n=== CROSS VALIDATION ===")

# Create time-series data processor
ts_processor = TimeSeriesDataProcessor(lookback_window=52, forecast_horizon=1)

# Fit and transform the data
print("Processing time-series data...")
ts_processor.fit(train_selected)
processed_data = ts_processor.transform(train_selected)

sequences = processed_data['sequences']
targets = processed_data['targets']
metadata = processed_data['metadata']

print(f"Generated {len(sequences)} sequences")
print(f"Sequence shape example: {sequences[0].shape if len(sequences) > 0 else 'No sequences'}")
print(f"Target shape example: {targets[0].shape if len(targets) > 0 else 'No targets'}")

if len(sequences) == 0:
    print("❌ No sequences generated. Check data processing.")
    wandb.log({"sequences_generated": 0, "processing_failed": True})
else:
    # Convert to consistent numpy arrays and ensure float type
    # Find the maximum number of features
    max_features = max([seq.shape[1] if len(seq.shape) > 1 else 1 for seq in sequences])
    lookback_length = sequences[0].shape[0]

    # Pad sequences to have consistent feature count and convert to float32
    padded_sequences = []
    valid_targets = []

    for i, (seq, tgt) in enumerate(zip(sequences, targets)):
        if len(seq.shape) == 1:
            seq = seq.reshape(-1, 1)

        # Pad features if necessary
        if seq.shape[1] < max_features:
            padding = np.zeros((seq.shape[0], max_features - seq.shape[1]), dtype=np.float32)
            seq = np.column_stack([seq, padding]).astype(np.float32)
        else:
             seq = seq.astype(np.float32) # Ensure consistent float type

        padded_sequences.append(seq)
        valid_targets.append(tgt.astype(np.float32)) # Ensure target is also float32


    sequences = np.array(padded_sequences, dtype=np.float32) # Explicitly set dtype to float32
    targets = np.array(valid_targets, dtype=np.float32) # Explicitly set dtype to float32


    print(f"Processed sequences shape: {sequences.shape}")
    print(f"Processed targets shape: {targets.shape}")

    # Time-based train-validation split (last 20% of data for validation)
    split_idx = int(0.8 * len(sequences))

    X_train_cv = sequences[:split_idx]
    y_train_cv = targets[:split_idx]
    X_val_cv = sequences[split_idx:]
    y_val_cv = targets[split_idx:]

    print(f"\nTrain-Validation Split:")
    print(f"Training sequences: {len(X_train_cv)}")
    print(f"Validation sequences: {len(X_val_cv)}")

    # Create datasets and dataloaders
    train_dataset = WalmartDataset(X_train_cv, y_train_cv)
    val_dataset = WalmartDataset(X_val_cv, y_val_cv)

    batch_size = 32
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Initialize N-BEATS model
    model_config = {
        "input_size": lookback_length,
        "forecast_size": 1,
        "num_stacks": 2,
        "num_blocks_per_stack": 3,
        "num_layers": 4,
        "layer_size": 256,  # Smaller for faster training
        "num_features": max_features
    }

    model = NBEATSModel(**model_config).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    print(f"\nModel initialized with config: {model_config}")
    print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")

    # Training loop
    num_epochs = 5  # Limited epochs for CV
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0

        for batch_idx, (sequences, targets) in enumerate(train_loader):
            sequences = sequences.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()
            outputs = model(sequences)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

            if batch_idx % 50 == 0:
                print(f'Epoch {epoch+1}/{num_epochs}, Batch {batch_idx}, Loss: {loss.item():.4f}')

        avg_train_loss = train_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        # Validation phase
        model.eval()
        val_loss = 0.0
        all_predictions = []
        all_targets = []

        with torch.no_grad():
            for sequences, targets in val_loader:
                sequences = sequences.to(device)
                targets = targets.to(device)

                outputs = model(sequences)
                loss = criterion(outputs, targets)
                val_loss += loss.item()

                all_predictions.extend(outputs.cpu().numpy())
                all_targets.extend(targets.cpu().numpy())

        avg_val_loss = val_loss / len(val_loader)
        val_losses.append(avg_val_loss)

        # Calculate metrics
        all_predictions = np.array(all_predictions).flatten()
        all_targets = np.array(all_targets).flatten()

        val_mae = mean_absolute_error(all_targets, all_predictions)
        val_rmse = np.sqrt(mean_squared_error(all_targets, all_predictions))
        val_r2 = r2_score(all_targets, all_predictions)

        # Safe MAPE calculation
        def safe_mape(y_true, y_pred):
            mask = y_true != 0
            if mask.sum() == 0:
                return float('inf')
            return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

        val_mape = safe_mape(all_targets, all_predictions)

        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'  Train Loss: {avg_train_loss:.4f}')
        print(f'  Val Loss: {avg_val_loss:.4f}')
        print(f'  Val MAE: {val_mae:.2f}')
        print(f'  Val RMSE: {val_rmse:.2f}')
        print(f'  Val MAPE: {val_mape:.2f}%')
        print(f'  Val R²: {val_r2:.4f}')

        # Log to wandb
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": avg_train_loss,
            "val_loss": avg_val_loss,
            "val_mae": val_mae,
            "val_rmse": val_rmse,
            "val_mape": val_mape if not np.isinf(val_mape) else 0.0,
            "val_r2": val_r2
        })

    # Final CV results
    final_metrics = {
        "cv_final_train_loss": train_losses[-1],
        "cv_final_val_loss": val_losses[-1],
        "cv_final_val_mae": val_mae,
        "cv_final_val_rmse": val_rmse,
        "cv_final_val_mape": val_mape if not np.isinf(val_mape) else 0.0,
        "cv_final_val_r2": val_r2,
        "sequences_generated": len(sequences),
        "train_sequences": len(X_train_cv),
        "val_sequences": len(X_val_cv)
    }

    wandb.log(final_metrics)

    print("\n✓ Cross validation completed and logged to wandb")
    print(f"Final validation metrics: MAE={val_mae:.2f}, RMSE={val_rmse:.2f}, MAPE={val_mape:.2f}%, R²={val_r2:.4f}")

0,1
corr_CPI,▁
corr_Fuel_Price,▁
corr_MarkDown1,▁
corr_MarkDown2,▁
corr_MarkDown3,▁
corr_MarkDown4,▁
corr_MarkDown5,▁
corr_Temperature,▁
corr_Unemployment,▁
holiday_sales_boost,▁

0,1
corr_CPI,-0.02092
corr_Fuel_Price,-0.00012
corr_MarkDown1,0.08478
corr_MarkDown2,0.03182
corr_MarkDown3,0.06146
corr_MarkDown4,0.06075
corr_MarkDown5,0.0888
corr_Temperature,-0.00231
corr_Unemployment,-0.02586
holiday_sales_boost,0.07134



=== CROSS VALIDATION ===
Processing time-series data...
Found 3331 store-dept combinations
Date range: 2010-02-05 00:00:00 to 2012-10-26 00:00:00
Generated 261083 valid sequences from 3331 store-dept combinations
Generated 261083 sequences
Sequence shape example: (52, 3)
Target shape example: (1,)
Processed sequences shape: (261083, 52, 3)
Processed targets shape: (261083, 1)

Train-Validation Split:
Training sequences: 208866
Validation sequences: 52217
NBeatsBlock initialized - input_size: 52, layer_size: 256, theta_size: 52
NBeatsBlock theta_b weight shape (in __init__): torch.Size([52, 256])
NBeatsBlock theta_f weight shape (in __init__): torch.Size([52, 256])
NBeatsBlock initialized - input_size: 52, layer_size: 256, theta_size: 52
NBeatsBlock theta_b weight shape (in __init__): torch.Size([52, 256])
NBeatsBlock theta_f weight shape (in __init__): torch.Size([52, 256])
NBeatsBlock initialized - input_size: 52, layer_size: 256, theta_size: 52
NBeatsBlock theta_b weight shape (in _

# Final Training & Model Registry

In [None]:
# Start new wandb run for final training
wandb.finish()
wandb.init(
    project="walmart-sales-forecasting",
    name="NBEATS_Final_Training",
    config={
        "stage": "final_training",
        "model_config": model_config,
        "num_epochs": 20,
        "batch_size": 32
    }
)

print("\n=== FINAL TRAINING ===")

# IMPORTANT: Re-process the data to ensure we have clean numpy arrays
print("Re-processing data for final training...")
ts_processor_final = TimeSeriesDataProcessor(lookback_window=52, forecast_horizon=1)
ts_processor_final.fit(train_selected)
processed_data_final = ts_processor_final.transform(train_selected)

sequences_final = processed_data_final['sequences']
targets_final = processed_data_final['targets']
metadata_final = processed_data_final['metadata']

print(f"Re-generated {len(sequences_final)} sequences for final training")

if len(sequences_final) == 0:
    print("❌ No sequences generated for final training. Check data processing.")
    wandb.log({"final_training_failed": True, "reason": "no_sequences"})
else:
    # Convert to consistent numpy arrays - ENSURE THEY STAY ON CPU
    max_features = max([seq.shape[1] if len(seq.shape) > 1 else 1 for seq in sequences_final])
    lookback_length = sequences_final[0].shape[0]

    padded_sequences_final = []
    valid_targets_final = []

    for i, (seq, tgt) in enumerate(zip(sequences_final, targets_final)):
        if len(seq.shape) == 1:
            seq = seq.reshape(-1, 1)

        if seq.shape[1] < max_features:
            padding = np.zeros((seq.shape[0], max_features - seq.shape[1]), dtype=np.float32)
            seq = np.column_stack([seq, padding]).astype(np.float32)
        else:
            seq = seq.astype(np.float32)

        padded_sequences_final.append(seq)
        valid_targets_final.append(tgt.astype(np.float32))

    # Ensure these are numpy arrays on CPU
    sequences_final_np = np.array(padded_sequences_final, dtype=np.float32)
    targets_final_np = np.array(valid_targets_final, dtype=np.float32)

    print(f"Final training data shape: {sequences_final_np.shape}")
    print(f"Final training targets shape: {targets_final_np.shape}")

    # Create dataset with CPU numpy arrays
    final_dataset = WalmartDataset(sequences_final_np, targets_final_np)
    final_loader = DataLoader(final_dataset, batch_size=32, shuffle=True)

    # Initialize fresh model for final training
    final_model = NBEATSModel(**model_config).to(device)
    final_optimizer = optim.Adam(final_model.parameters(), lr=0.001)
    final_criterion = nn.MSELoss()

    print(f"Training on {len(sequences_final_np)} sequences...")

    # Extended training
    num_epochs_final = 20
    best_loss = float('inf')

    for epoch in range(num_epochs_final):
        final_model.train()
        epoch_loss = 0.0

        for batch_idx, (sequences_batch, targets_batch) in enumerate(final_loader):
            sequences_batch = sequences_batch.to(device)
            targets_batch = targets_batch.to(device)

            final_optimizer.zero_grad()
            outputs = final_model(sequences_batch)
            loss = final_criterion(outputs, targets_batch)
            loss.backward()
            final_optimizer.step()

            epoch_loss += loss.item()

        avg_epoch_loss = epoch_loss / len(final_loader)

        if avg_epoch_loss < best_loss:
            best_loss = avg_epoch_loss

        if (epoch + 1) % 5 == 0:
            print(f'Final Training Epoch {epoch+1}/{num_epochs_final}, Loss: {avg_epoch_loss:.4f}')

            # Log training progress
            wandb.log({
                "final_epoch": epoch + 1,
                "final_train_loss": avg_epoch_loss,
                "best_loss": best_loss
            })

    # Final evaluation on training data
    print("\nEvaluating final model...")
    final_model.eval()
    all_final_predictions = []
    all_final_targets = []

    with torch.no_grad():
        for sequences_batch, targets_batch in final_loader:
            sequences_batch = sequences_batch.to(device)
            outputs = final_model(sequences_batch)
            all_final_predictions.extend(outputs.cpu().numpy())
            all_final_targets.extend(targets_batch.numpy())

    all_final_predictions = np.array(all_final_predictions).flatten()
    all_final_targets = np.array(all_final_targets).flatten()

    # Calculate final metrics
    final_mae = mean_absolute_error(all_final_targets, all_final_predictions)
    final_rmse = np.sqrt(mean_squared_error(all_final_targets, all_final_predictions))
    final_r2 = r2_score(all_final_targets, all_final_predictions)

    # Safe MAPE calculation
    def safe_mape(y_true, y_pred):
        mask = y_true != 0
        if mask.sum() == 0:
            return float('inf')
        return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

    final_mape = safe_mape(all_final_targets, all_final_predictions)

    print(f"\nFinal Training Metrics:")
    print(f"MAE: {final_mae:.2f}")
    print(f"RMSE: {final_rmse:.2f}")
    print(f"MAPE: {final_mape:.2f}%")
    print(f"R²: {final_r2:.4f}")

    # Create complete pipeline
    class NBEATSPipeline:
        """Complete pipeline for N-BEATS inference"""

        def __init__(self, feature_merger, missing_handler, ts_processor, model):
            self.feature_merger = feature_merger
            self.missing_handler = missing_handler
            self.ts_processor = ts_processor
            self.model = model
            self.model.eval()

        def predict(self, X_raw, stores_df=None, features_df=None):
            """Make predictions on raw test data"""
            # If auxiliary data provided, update the merger
            if stores_df is not None or features_df is not None:
                self.feature_merger.fit(X_raw, stores_df=stores_df, features_df=features_df)

            # Process through pipeline
            merged_data = self.feature_merger.transform(X_raw)
            cleaned_data = self.missing_handler.transform(merged_data)

            # For inference, we need to create sequences from the cleaned data
            processed = self.ts_processor.transform(cleaned_data)

            if len(processed['sequences']) == 0:
                return np.array([])

            # Convert to tensor and predict
            sequences_tensor = torch.FloatTensor(processed['sequences']).to(device)

            with torch.no_grad():
                predictions = self.model(sequences_tensor)

            return predictions.cpu().numpy().flatten()

    # Create final pipeline
    final_pipeline = NBEATSPipeline(
        feature_merger=feature_merger,
        missing_handler=missing_handler,
        ts_processor=ts_processor_final,  # Use the final processor
        model=final_model
    )

    print("\n=== SAVING FINAL MODEL ===")

    # Save pipeline with cloudpickle
    try:
        import cloudpickle
    except ImportError:
        import subprocess
        subprocess.check_call(['pip', 'install', 'cloudpickle'])
        import cloudpickle

    # Create filename
    pipeline_filename = f"nbeats_pipeline_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pkl"

    # Save with cloudpickle
    with open(pipeline_filename, 'wb') as f:
        cloudpickle.dump(final_pipeline, f)

    print(f"Pipeline saved as: {pipeline_filename}")

    # Try to upload to wandb with error handling
    try:
        # Create model artifact
        model_artifact = wandb.Artifact(
            name="NBEATS_pipeline",
            type="model",
            description="Final N-BEATS pipeline for Walmart sales forecasting",
            metadata={
                "train_mae": float(final_mae),
                "train_rmse": float(final_rmse),
                "train_mape": float(final_mape) if not np.isinf(final_mape) else 0.0,
                "train_r2": float(final_r2),
                "sequences_count": len(sequences_final_np),
                "training_samples": len(all_final_targets),
                "model_type": "NBEATS",
                "lookback_window": 52,
                "forecast_horizon": 1,
                "num_stacks": model_config["num_stacks"],
                "num_blocks_per_stack": model_config["num_blocks_per_stack"]
            }
        )

        # Add model file to artifact
        model_artifact.add_file(pipeline_filename)

        # Log artifact
        wandb.log_artifact(model_artifact)
        print("✓ Model artifact logged to wandb successfully!")

    except Exception as e:
        print(f"⚠️ Error uploading to wandb: {e}")
        print("Model saved locally - you can manually upload later")

        # Log just the metrics without artifact
        wandb.log({
            'final_train_mae': final_mae,
            'final_train_rmse': final_rmse,
            'final_train_mape': final_mape if not np.isinf(final_mape) else 0.0,
            'final_train_r2': final_r2,
            'model_saved_locally': pipeline_filename
        })

    # Final summary
    print(f"\n" + "="*60)
    print("FINAL MODEL SUMMARY")
    print("="*60)
    print(f"Model Type: N-BEATS")
    print(f"Training Sequences: {len(all_final_targets):,}")
    print(f"Lookback Window: 52 weeks")
    print(f"Forecast Horizon: 1 week")
    print(f"Training MAE: {final_mae:.2f}")
    print(f"Training RMSE: {final_rmse:.2f}")
    print(f"Training MAPE: {final_mape:.2f}%")
    print(f"Training R²: {final_r2:.4f}")
    print(f"Pipeline saved as: {pipeline_filename}")
    print("="*60)

wandb.finish()
print("\n✓ Final training completed and model saved!")


=== FINAL TRAINING ===
Re-processing data for final training...
Found 3331 store-dept combinations
Date range: 2010-02-05 00:00:00 to 2012-10-26 00:00:00
Generated 261083 valid sequences from 3331 store-dept combinations
Re-generated 261083 sequences for final training
Final training data shape: (261083, 52, 3)
Final training targets shape: (261083, 1)
NBeatsBlock initialized - input_size: 52, layer_size: 256, theta_size: 52
NBeatsBlock theta_b weight shape (in __init__): torch.Size([52, 256])
NBeatsBlock theta_f weight shape (in __init__): torch.Size([52, 256])
NBeatsBlock initialized - input_size: 52, layer_size: 256, theta_size: 52
NBeatsBlock theta_b weight shape (in __init__): torch.Size([52, 256])
NBeatsBlock theta_f weight shape (in __init__): torch.Size([52, 256])
NBeatsBlock initialized - input_size: 52, layer_size: 256, theta_size: 52
NBeatsBlock theta_b weight shape (in __init__): torch.Size([52, 256])
NBeatsBlock theta_f weight shape (in __init__): torch.Size([52, 256])
NB