=============================================================================

TRAIN BASELINE MODEL NOTEBOOK

    Purpose: Load raw data, preprocess, split, define GRU model, train baseline model, save processed data and baseline model.

=============================================================================

# Import Configuration and Libraries

In [None]:
import os
import warnings
import logging

# Suppress warnings
os.environ["GYM_DISABLE_WARNINGS"] = "true"
warnings.filterwarnings("ignore", module="gymnasium")
warnings.filterwarnings("ignore", category=UserWarning)
logging.getLogger("gymnasium").setLevel(logging.ERROR)

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
import copy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm
from torch.optim.lr_scheduler import CosineAnnealingLR
from builtins import print as builtin_print # To avoid conflict with tqdm print

print("Libraries imported.")

# Configuration Class

In [None]:
class Config:
    # Data Paths
    DATA_PATH = '/kaggle/input/seattle-weather/seattle-weather.csv' # Adjust if needed
    PROCESSED_DATA_SAVE_PATH = '/kaggle/working/processed_data.pt' # Output path for processed data
    BASELINE_MODEL_SAVE_PATH = '/kaggle/working/baseline_model.pth' # Output path for baseline model

    # Data Parameters
    SEQUENCE_LENGTH = 30
    TEST_SIZE = 0.2
    VAL_SIZE_FROM_TEMP = 0.1 # Validation size from the 80% remaining after test split
    RANDOM_STATE = 42

    # Model Parameters
    INPUT_DIM = 4      # precipitation, temp_max, temp_min, wind
    HIDDEN_DIM = 256
    N_LAYERS = 2
    OUTPUT_DIM = 1
    DROPOUT = 0.2

    # Training Parameters
    BATCH_SIZE = 64
    EPOCHS = 500
    LEARNING_RATE = 0.0001
    WEIGHT_DECAY = 1e-4
    SCHEDULER_T_MAX = 50
    SCHEDULER_ETA_MIN = 1e-6

    # Device
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Running on device: {Config.DEVICE}")
print(f"Processed data will be saved to: {Config.PROCESSED_DATA_SAVE_PATH}")
print(f"Baseline model will be saved to: {Config.BASELINE_MODEL_SAVE_PATH}")

# Data Loading and Preprocessing

In [None]:
def create_sequences(data, target, seq_length):
    """Creates sequences for time-series forecasting."""
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data[i:(i + seq_length)]
        y = target[i + seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Load data
try:
    df = pd.read_csv(Config.DATA_PATH)
    print(f"Dataset loaded successfully from {Config.DATA_PATH}")
except FileNotFoundError:
    print(f"Error: Dataset file not found at {Config.DATA_PATH}. Please ensure it's added correctly.")
    # Exit or raise error if data is essential
    raise

# Feature Engineering & Cleaning
df['weather_numeric'] = df['weather'].apply(lambda x: 1 if x in ['rain', 'drizzle'] else 0)
df = df.drop(columns=['date', 'weather'])

# Scaling
scaler = MinMaxScaler()
features_to_scale = df.drop('weather_numeric', axis=1).columns
scaled_features_np = scaler.fit_transform(df[features_to_scale])
target_np = df['weather_numeric'].values
print("Features scaled.")

# Sequence Creation
X_np, y_np = create_sequences(scaled_features_np, target_np, Config.SEQUENCE_LENGTH)
print(f"Sequences created with length {Config.SEQUENCE_LENGTH}.")

# Data Splitting (Stratified)
X_temp, X_test_np, y_temp, y_test_np = train_test_split(
    X_np, y_np, test_size=Config.TEST_SIZE, random_state=Config.RANDOM_STATE, stratify=y_np
)
X_train_np, X_val_np, y_train_np, y_val_np = train_test_split(
    X_temp, y_temp, test_size=Config.VAL_SIZE_FROM_TEMP, random_state=Config.RANDOM_STATE, stratify=y_temp
)
print(f"Data split: Train={len(X_train_np)}, Validation={len(X_val_np)}, Test={len(X_test_np)}")

# Convert to PyTorch Tensors
X_train_tensor = torch.tensor(X_train_np, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_np, dtype=torch.float32).unsqueeze(1)
X_val_tensor = torch.tensor(X_val_np, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_np, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test_np, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_np, dtype=torch.float32).unsqueeze(1)
print("Data converted to PyTorch tensors.")

# Save Processed Data
processed_data = {
    'X_train': X_train_tensor, 'y_train': y_train_tensor,
    'X_val': X_val_tensor, 'y_val': y_val_tensor,
    'X_test': X_test_tensor, 'y_test': y_test_tensor,
}
try:
    torch.save(processed_data, Config.PROCESSED_DATA_SAVE_PATH)
    print(f"Processed data saved to {Config.PROCESSED_DATA_SAVE_PATH}")
except Exception as e:
    print(f"Error saving processed data: {e}")

# GRU Model Definition

In [None]:
class WeatherGRU(nn.Module):
    """GRU model for weather prediction."""
    def __init__(self, input_dim, hidden_dim, n_layers, output_dim, dropout):
        super(WeatherGRU, self).__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, n_layers,
                          batch_first=True, dropout=dropout if n_layers > 1 else 0) # Dropout only between layers
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        gru_out, _ = self.gru(x) # Output from GRU
        # Use output of the last time step from the last layer
        out = self.fc(gru_out[:, -1, :])
        return self.sigmoid(out)

print("WeatherGRU model class defined.")

# Baseline Model Training Function

In [None]:
def train_model(model, train_loader, val_loader, epochs, lr, weight_decay, scheduler_t_max, scheduler_eta_min, device):
    """Trains the GRU model."""
    criterion = nn.BCELoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = CosineAnnealingLR(optimizer, T_max=scheduler_t_max, eta_min=scheduler_eta_min)

    model.to(device)
    best_val_loss = float('inf')
    best_model_state = None

    print("\n--- Starting Baseline Model Training ---")
    for epoch in range(epochs):
        model.train()
        total_train_loss = 0
        train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} [Train]", leave=False)
        for inputs, labels in train_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()
            train_bar.set_postfix(loss=loss.item())
        avg_train_loss = total_train_loss / len(train_loader)

        # Validation phase
        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                total_val_loss += loss.item()
        avg_val_loss = total_val_loss / len(val_loader)

        current_lr = optimizer.param_groups[0]['lr']
        builtin_print(f"Epoch {epoch+1}/{epochs}: Train Loss={avg_train_loss:.4f}, Val Loss={avg_val_loss:.4f}, LR={current_lr:.6f}")

        scheduler.step()

        # Save best model based on validation loss
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model_state = copy.deepcopy(model.state_dict())
            builtin_print(f"  New best validation loss: {best_val_loss:.4f}. Saving model state.")

    # Load the best model state found during training
    if best_model_state:
        model.load_state_dict(best_model_state)
        print(f"\n--- Best model state loaded (Val Loss: {best_val_loss:.4f}) ---")
    else:
        print("\n--- Warning: No best model state saved (check validation loss) ---")

    print("--- Baseline Model Training Finished ---")
    return model

# Execute Baseline Training

In [None]:
# Create DataLoaders for training
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=Config.BATCH_SIZE, shuffle=False)
print("Train and Validation DataLoaders created.")

# Initialize model
baseline_model = WeatherGRU(
    input_dim=Config.INPUT_DIM,
    hidden_dim=Config.HIDDEN_DIM,
    n_layers=Config.N_LAYERS,
    output_dim=Config.OUTPUT_DIM,
    dropout=Config.DROPOUT
)
print("Baseline WeatherGRU model initialized.")

# Train the model
baseline_model = train_model(
    model=baseline_model,
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=Config.EPOCHS,
    lr=Config.LEARNING_RATE,
    weight_decay=Config.WEIGHT_DECAY,
    scheduler_t_max=Config.SCHEDULER_T_MAX,
    scheduler_eta_min=Config.SCHEDULER_ETA_MIN,
    device=Config.DEVICE
)

# Save the trained baseline model
try:
    torch.save(baseline_model.state_dict(), Config.BASELINE_MODEL_SAVE_PATH)
    print(f"Baseline model state dictionary saved to {Config.BASELINE_MODEL_SAVE_PATH}")
except Exception as e:
    print(f"Error saving baseline model: {e}")