# =============================================================================
# BASELINE MODEL TRAINING NOTEBOOK
# =============================================================================
## Purpose:
    - Establish a deterministic environment for reproducibility.
    - Load and preprocess the time-series weather dataset (Seattle Weather).
    - Define the baseline Gated Recurrent Unit (GRU) architecture.
    - Train the model using a standard supervised learning approach.
    - Save the optimized model weights and processed tensors for the RL stage.
# =============================================================================

# === Clone Repository & Install Dependencies ===

In [1]:
# Use if run on Kaggle
!rm -rf Sustainable_AI_Agent_Project
!git clone https://github.com/trongjhuongwr/Sustainable_AI_Agent_Project.git
%cd Sustainable_AI_Agent_Project

Cloning into 'Sustainable_AI_Agent_Project'...
remote: Enumerating objects: 68, done.[K
remote: Counting objects: 100% (68/68), done.[K
remote: Compressing objects: 100% (51/51), done.[K
remote: Total 68 (delta 26), reused 54 (delta 15), pack-reused 0 (from 0)[K
Receiving objects: 100% (68/68), 1.16 MiB | 5.17 MiB/s, done.
Resolving deltas: 100% (26/26), done.
/kaggle/working/Sustainable_AI_Agent_Project


In [2]:
!pip install -q --extra-index-url https://download.pytorch.org/whl/cu121 -r /kaggle/working/Sustainable_AI_Agent_Project/requirements.txt

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m780.5/780.5 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m97.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m43.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m118.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.6/410.6 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.6/121.6 MB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━

# 1. Import Libraries and Configuration

In [3]:
import os
import warnings
import logging
import copy
import random

# Suppress warnings for cleaner output
os.environ["GYM_DISABLE_WARNINGS"] = "true"
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", module="gymnasium")
warnings.filterwarnings("ignore", category=UserWarning)
logging.getLogger("gymnasium").setLevel(logging.ERROR)

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm
from torch.optim.lr_scheduler import CosineAnnealingLR
from builtins import print as builtin_print

print("Libraries imported successfully.")

Libraries imported successfully.


# 2. Configuration Class

In [4]:
# Defines hyperparameters, file paths, and other parameters used throughout the baseline training process.
class Config:
    # --- Data Paths ---
    DATA_PATH = '/kaggle/input/weather-prediction/seattle-weather.csv' # Path to the raw dataset CSV file
    PROCESSED_DATA_SAVE_PATH = '/kaggle/working/processed_data.pt'     # Output path for saving processed data tensors
    BASELINE_MODEL_SAVE_PATH = '/kaggle/working/baseline_model.pth'    # Output path for saving the trained baseline model state dictionary

    # --- Data Preprocessing Parameters ---
    SEQUENCE_LENGTH = 30      # Number of past days used to predict the next day
    TEST_SIZE = 0.2           # Proportion of data reserved for the final test set
    VAL_SIZE = 0.1            # Proportion of the remaining data (after test split) used for validation
    SEED = 42                 # Random seed for reproducibility

    # --- Model Architecture Parameters ---
    INPUT_DIM = 4      # Number of input features: precipitation, temp_max, temp_min, wind
    HIDDEN_DIM = 256   # Dimensionality of the GRU hidden state
    N_LAYERS = 2       # Number of stacked GRU layers
    OUTPUT_DIM = 1     # Output dimension (binary classification: rain probability)
    DROPOUT = 0.2      # Dropout rate applied between GRU layers

    # --- Training Hyperparameters ---
    BATCH_SIZE = 64
    EPOCHS = 100              # Number of training epochs
    LEARNING_RATE = 1e-4      # AdamW initial learning rate
    WEIGHT_DECAY = 1e-4       # Weight decay for AdamW optimizer
    SCHEDULER_T_MAX = 50      # T_max for CosineAnnealingLR scheduler (cycle length)
    SCHEDULER_ETA_MIN = 1e-6  # Minimum learning rate for scheduler

    # --- Computation Device ---
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True 
    torch.backends.cudnn.benchmark = False

seed_everything(Config.SEED)

print(f"Configuration loaded. Using device: {Config.DEVICE}")
print(f"Seed set to: {Config.SEED}")
print(f"Processed data will be saved to: {Config.PROCESSED_DATA_SAVE_PATH}")
print(f"Baseline model will be saved to: {Config.BASELINE_MODEL_SAVE_PATH}")

Configuration loaded. Using device: cuda
Seed set to: 42
Processed data will be saved to: /kaggle/working/processed_data.pt
Baseline model will be saved to: /kaggle/working/baseline_model.pth


# 3. Data Loading and Preprocessing

In [5]:
def load_and_preprocess_data(config):
    """
    Loads raw data, performs feature scaling, generates sequences, 
    and creates PyTorch tensors for training.
    
    Returns:
        dict: A dictionary containing train/val/test loaders and tensors.
    """
    # 1. Load Data
    try:
        df = pd.read_csv(config.DATA_PATH)
        print(f"Data loaded successfully. Shape: {df.shape}")
    except FileNotFoundError:
        raise FileNotFoundError(f"Dataset not found at {config.DATA_PATH}. Please verify the path.")

    # 2. Feature Engineering
    # Convert categorical 'weather' to binary target (1: Rain/Drizzle, 0: Others)
    df['target'] = df['weather'].apply(lambda x: 1 if x in ['rain', 'drizzle'] else 0)
    features = ['precipitation', 'temp_max', 'temp_min', 'wind']
    
    # 3. Normalization (Min-Max Scaling) to [0, 1]
    # Essential for neural network convergence
    scaler = MinMaxScaler()
    scaled_features = scaler.fit_transform(df[features])
    targets = df['target'].values
    
    # 4. Sequence Generation (Sliding Window)
    X, y = [], []
    for i in range(len(scaled_features) - config.SEQUENCE_LENGTH):
        X.append(scaled_features[i : i + config.SEQUENCE_LENGTH])
        y.append(targets[i + config.SEQUENCE_LENGTH])
    
    X = np.array(X)
    y = np.array(y)
    
    # 5. Stratified Data Splitting
    # Split Test Set
    X_temp, X_test, y_temp, y_test = train_test_split(
        X, y, test_size=config.TEST_SIZE, random_state=config.SEED, stratify=y
    )
    # Split Validation Set
    X_train, X_val, y_train, y_val = train_test_split(
        X_temp, y_temp, test_size=config.VAL_SIZE, random_state=config.SEED, stratify=y_temp
    )
    
    # 6. Tensor Conversion
    # Move data to GPU memory if available for faster training
    tensors = {
        'X_train': torch.FloatTensor(X_train),
        'y_train': torch.FloatTensor(y_train).unsqueeze(1),
        'X_val': torch.FloatTensor(X_val),
        'y_val': torch.FloatTensor(y_val).unsqueeze(1),
        'X_test': torch.FloatTensor(X_test),
        'y_test': torch.FloatTensor(y_test).unsqueeze(1)
    }
    
    print(f"Training Samples: {len(X_train)} | Validation: {len(X_val)} | Test: {len(X_test)}")
    
    # Save processed tensors for the RL Agent (Stage 2)
    torch.save(tensors, config.PROCESSED_DATA_SAVE_PATH)
    print(f"Processed data saved to {config.PROCESSED_DATA_SAVE_PATH}")
    
    return tensors

# Execute preprocessing
data_tensors = load_and_preprocess_data(Config)

# Create DataLoaders
train_loader = DataLoader(TensorDataset(data_tensors['X_train'], data_tensors['y_train']), 
                          batch_size=Config.BATCH_SIZE, shuffle=True)
val_loader = DataLoader(TensorDataset(data_tensors['X_val'], data_tensors['y_val']), 
                        batch_size=Config.BATCH_SIZE, shuffle=False)

Data loaded successfully. Shape: (1461, 6)
Training Samples: 1029 | Validation: 115 | Test: 287
Processed data saved to /kaggle/working/processed_data.pt


# 4. Model Architecture (Baseline GRU)

In [6]:
class WeatherGRU(nn.Module):
    """
    Standard Gated Recurrent Unit (GRU) architecture for time-series binary classification.
    Constructed to serve as a baseline for subsequent compression experiments.
    """
    def __init__(self, config):
        super(WeatherGRU, self).__init__()
        self.gru = nn.GRU(
            input_size=config.INPUT_DIM,
            hidden_size=config.HIDDEN_DIM,
            num_layers=config.N_LAYERS,
            batch_first=True,
            dropout=config.DROPOUT if config.N_LAYERS > 1 else 0
        )
        self.fc = nn.Linear(config.HIDDEN_DIM, config.OUTPUT_DIM)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        # x shape: (batch_size, seq_len, input_dim)
        out, _ = self.gru(x)
        
        # Utilize the hidden state from the last time step
        # out[:, -1, :] shape: (batch_size, hidden_dim)
        out = self.fc(out[:, -1, :])
        return self.sigmoid(out)

# 5. Training Routine

In [7]:
def train_model(model, train_loader, val_loader, config):
    """
    Executes the training loop with validation monitoring and model checkpointing.
    """
    criterion = nn.BCELoss() # Binary Cross Entropy
    optimizer = torch.optim.AdamW(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY)
    scheduler = CosineAnnealingLR(optimizer, T_max=config.EPOCHS)
    
    best_val_loss = float('inf')
    model.to(config.DEVICE)
    
    print("\n--- Initiating Baseline Training ---")
    progress_bar = tqdm(range(config.EPOCHS), desc="Training Epochs")
    
    for epoch in progress_bar:
        # Training Phase
        model.train()
        train_loss = 0.0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(config.DEVICE), y_batch.to(config.DEVICE)
            
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            
        avg_train_loss = train_loss / len(train_loader)
        
        # Validation Phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(config.DEVICE), y_batch.to(config.DEVICE)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item()
        
        avg_val_loss = val_loss / len(val_loader)
        scheduler.step()
        
        # Checkpointing
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), config.BASELINE_MODEL_SAVE_PATH)
            
        progress_bar.set_postfix({'Train Loss': f'{avg_train_loss:.4f}', 'Val Loss': f'{avg_val_loss:.4f}'})

    print(f"\nTraining completed. Best Validation Loss: {best_val_loss:.4f}")
    print(f"Model checkpoint saved to {config.BASELINE_MODEL_SAVE_PATH}")

# Initialize and Train
baseline_model = WeatherGRU(Config)
train_model(baseline_model, train_loader, val_loader, Config)


--- Initiating Baseline Training ---


Training Epochs:   0%|          | 0/100 [00:00<?, ?it/s]


Training completed. Best Validation Loss: 0.5999
Model checkpoint saved to /kaggle/working/baseline_model.pth
