# =============================================================================
# REINFORCEMENT LEARNING AGENT TRAINING NOTEBOOK
# =============================================================================
## Purpose:
    - Define the custom RL Environment (`SustainableAIAgentEnv`) with fail-safe reward mechanisms.
    - Implement a Proximal Policy Optimization (PPO) agent with entropy regularization.
    - Conduct a comparative benchmark against a Random Search strategy.
    - Save the best-performing policy and metrics for final evaluation.
# =============================================================================

# === Clone Repository & Install Dependencies ===

In [1]:
# Use if run on Kaggle
!rm -rf Sustainable_AI_Agent_Project
!git clone https://github.com/trongjhuongwr/Sustainable_AI_Agent_Project.git
%cd Sustainable_AI_Agent_Project

Cloning into 'Sustainable_AI_Agent_Project'...
remote: Enumerating objects: 68, done.[K
remote: Counting objects: 100% (68/68), done.[K
remote: Compressing objects: 100% (51/51), done.[K
remote: Total 68 (delta 26), reused 54 (delta 15), pack-reused 0 (from 0)[K
Receiving objects: 100% (68/68), 1.16 MiB | 8.67 MiB/s, done.
Resolving deltas: 100% (26/26), done.
/kaggle/working/Sustainable_AI_Agent_Project


In [2]:
!pip install -q --extra-index-url https://download.pytorch.org/whl/cu121 -r /kaggle/working/Sustainable_AI_Agent_Project/requirements.txt

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m780.5/780.5 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m98.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m46.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m118.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.6/410.6 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.6/121.6 MB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━

In [3]:
# Uninstall torchvision to prevent import conflicts with ptflops/pytorch
!pip uninstall -y torchvision
print("Torchvision uninstalled.")

Found existing installation: torchvision 0.21.0+cu124
Uninstalling torchvision-0.21.0+cu124:
  Successfully uninstalled torchvision-0.21.0+cu124
Torchvision uninstalled.


# 1. Import Libraries and Configuration

In [4]:
import os
import warnings
import logging
import json
import copy
import random

# Suppress specific warnings for cleaner output
os.environ["GYM_DISABLE_WARNINGS"] = "true"
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", module="gymnasium")
warnings.filterwarnings("ignore", category=UserWarning)
logging.getLogger("gymnasium").setLevel(logging.ERROR)
logging.getLogger("stable_baselines3").setLevel(logging.ERROR)

import torch
import torch.nn as nn
import torch.nn.utils.prune as prune
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from sklearn.metrics import accuracy_score
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import CheckpointCallback
from codecarbon import EmissionsTracker
from ptflops import get_model_complexity_info
import torch_pruning as tp
from tqdm.notebook import tqdm
from builtins import print as builtin_print

print("Libraries imported successfully.")

2025-12-23 17:33:00.382452: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1766511180.559107      20 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766511180.609266      20 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Libraries imported successfully.


# 2. Configuration Class

In [5]:
class Config:
    # --- Input/Output Paths ---
    PROCESSED_DATA_PATH = '/kaggle/input/baseline-model-saa/processed_data.pt'
    BASELINE_MODEL_PATH = '/kaggle/input/baseline-model-saa/baseline_model.pth'
    AGENT_SAVE_PATH = "/kaggle/working/sustainable_ai_agent_expanded.zip" # New Name
    BEST_ACTION_SAVE_PATH = "/kaggle/working/best_action_expanded.json"   # New Name
    
    # --- Data & Model Params ---
    SEQUENCE_LENGTH = 30
    INPUT_DIM = 4
    HIDDEN_DIM = 256
    N_LAYERS = 2
    OUTPUT_DIM = 1
    DROPOUT = 0.2
    
    # --- RL Hyperparameters ---
    SEED = 42
    # Increased timesteps because search space is 8x larger (16 -> 128)
    TOTAL_TIMESTEPS = 50000 
    
    # Reward Shaping
    ACCURACY_PENALTY_THRESHOLD = 0.98
    ACC_REWARD_SCALE = 20.0
    FLOPS_REWARD_SCALE = 2.0
    PARAMS_REWARD_SCALE = 1.0
    
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === 3. Reproducibility ===
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(Config.SEED)

print(f"Configuration loaded. Using device: {Config.DEVICE}")
print(f"Seed set to: {Config.SEED}")
print(f"Loading processed data from: {Config.PROCESSED_DATA_PATH}")
print(f"Loading baseline model from: {Config.BASELINE_MODEL_PATH}")
print(f"Agent will be saved to: {Config.AGENT_SAVE_PATH}")

Configuration loaded. Using device: cuda
Seed set to: 42
Loading processed data from: /kaggle/input/baseline-model-saa/processed_data.pt
Loading baseline model from: /kaggle/input/baseline-model-saa/baseline_model.pth
Agent will be saved to: /kaggle/working/sustainable_ai_agent_expanded.zip


# 3. Utility Functions and Model Definition

In [6]:
class WeatherGRU(nn.Module):
    def __init__(self, config):
        super(WeatherGRU, self).__init__()
        self.gru = nn.GRU(
            input_size=config.INPUT_DIM,
            hidden_size=config.HIDDEN_DIM,
            num_layers=config.N_LAYERS,
            batch_first=True,
            dropout=config.DROPOUT if config.N_LAYERS > 1 else 0
        )
        self.fc = nn.Linear(config.HIDDEN_DIM, config.OUTPUT_DIM)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out, _ = self.gru(x)
        out = self.fc(out[:, -1, :])
        return self.sigmoid(out)

# 4. Optimization Primitives (Pruning & Quantization)

In [7]:
def count_parameters(model):
    """Calculates the effective number of non-zero parameters in the model."""
    total_params = 0
    for p in model.parameters():
        if p.requires_grad:
            total_params += torch.count_nonzero(p).item()
    return total_params

def apply_layerwise_pruning(model, l0_rate, l1_rate, linear_rate):
    """
    Implements Deep Granular Pruning Strategy:
    - l0_rate: Pruning rate for GRU Layer 0 (Feature Extraction Layer - Sensitive).
    - l1_rate: Pruning rate for GRU Layer 1 (Abstract Representation Layer - Redundant).
    - linear_rate: Pruning rate for the Linear Readout Layer.
    """
    model_copy = copy.deepcopy(model)
    
    # 1. Layer-wise GRU Pruning
    for module in model_copy.modules():
        if isinstance(module, nn.GRU):
            for name_param, param in list(module.named_parameters()):
                if 'weight' in name_param:
                    # Identify layer index based on parameter name convention
                    if 'l0' in name_param:   # Layer 0
                        rate = l0_rate
                    elif 'l1' in name_param: # Layer 1
                        rate = l1_rate
                    else:
                        rate = 0.0
                    
                    if rate > 0:
                        prune.l1_unstructured(module, name=name_param, amount=rate)
                        prune.remove(module, name=name_param)

    # 2. Linear Layer Pruning
    if linear_rate > 0:
        for module in model_copy.modules():
            if isinstance(module, nn.Linear):
                prune.l1_unstructured(module, name='weight', amount=linear_rate)
                prune.remove(module, name='weight')
                
    return model_copy

def apply_quantization(model):
    """Applies dynamic quantization (Int8) to reduce model size and inference latency."""
    model_copy = copy.deepcopy(model)
    model_copy.to('cpu')
    model_copy.eval()
    quantized_model = torch.quantization.quantize_dynamic(
        model_copy, {nn.Linear, nn.GRU}, dtype=torch.qint8
    )
    return quantized_model

# 5. Custom RL Environment with Fail-Safe Mechanism

In [8]:
# [REPLACEMENT FOR CELL 8]
class SustainableAIAgentEnvExpanded(gym.Env):
    """
    Continuous Deep Control Environment for Eco-friendly AI Optimization.
    Action Space (4D Continuous Box): [GRU_L0, GRU_L1, Linear, Quantization_Prob]
    """
    def __init__(self, baseline_model, val_loader, config):
        super(SustainableAIAgentEnvExpanded, self).__init__()
        self.baseline_model = baseline_model
        self.val_loader = val_loader
        self.config = config
        
        # Initialize Baseline Benchmarks
        self.baseline_metrics = self._evaluate_performance(self.baseline_model)
        if self.baseline_metrics['flops'] < 1000: self.baseline_metrics['flops'] = 1e6
        print(f"Baseline Benchmark Metrics: {self.baseline_metrics}")
        
        # --- Continuous Action Space ---
        # 0: GRU Layer 0 Pruning Rate (0.0 - 1.0)
        # 1: GRU Layer 1 Pruning Rate (0.0 - 1.0)
        # 2: Linear Pruning Rate (0.0 - 1.0)
        # 3: Quantization Probability (Threshold > 0.5 triggers quantization)
        self.action_space = spaces.Box(low=0.0, high=1.0, shape=(4,), dtype=np.float32)
        
        # Observation Space: [Accuracy, Accuracy_Delta, Param_Reduction, FLOPs_Reduction]
        self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(4,), dtype=np.float32)

    def _evaluate_performance(self, model):
        # (Logic giữ nguyên nhưng đảm bảo tính đúng đắn cho Quantization)
        model.eval()
        is_quantized = any("quantized" in str(type(m)).lower() for m in model.modules())
        device = torch.device("cpu") if is_quantized else self.config.DEVICE
        model.to(device)
        
        y_true, y_pred = [], []
        with torch.no_grad():
            for X, y in self.val_loader:
                X = X.to(device)
                preds = (model(X) > 0.5).float()
                y_true.extend(y.cpu().numpy())
                y_pred.extend(preds.cpu().numpy())
        accuracy = accuracy_score(y_true, y_pred)
        
        params = count_parameters(model)
        baseline_p = self.baseline_metrics['params'] if hasattr(self, 'baseline_metrics') else params
        if baseline_p == 0: baseline_p = 1
        param_ratio = params / baseline_p
        
        baseline_f = self.baseline_metrics['flops'] if hasattr(self, 'baseline_metrics') else 1e6
        quant_factor = 0.25 if is_quantized else 1.0
        flops = baseline_f * param_ratio * quant_factor
        
        return {"accuracy": accuracy, "params": params, "flops": flops}

    def step(self, action):
        # 1. Decode Continuous Actions
        # Clip to safe ranges (e.g., max 90% pruning to prevent total information loss)
        l0_rate = np.clip(action[0], 0.0, 0.90) 
        l1_rate = np.clip(action[1], 0.0, 0.95) # Layer 1 is more redundant, allow higher pruning
        linear_rate = np.clip(action[2], 0.0, 0.95)
        
        # Quantization Decision
        use_quantization = action[3] > 0.5
        
        # 2. Apply Deep Granular Optimizations
        current_model = copy.deepcopy(self.baseline_model)
        current_model = apply_layerwise_pruning(current_model, l0_rate, l1_rate, linear_rate)
        
        if use_quantization:
            current_model = apply_quantization(current_model)
            
        # 3. Evaluate
        metrics = self._evaluate_performance(current_model)
        
        # 4. Calculate Reward (Multi-objective)
        acc_drop = metrics['accuracy'] - self.baseline_metrics['accuracy']
        flops_reduction = 1.0 - (metrics['flops'] / self.baseline_metrics['flops'])
        params_reduction = 1.0 - (metrics['params'] / self.baseline_metrics['params'])
        
        # Penalty for significant accuracy degradation (>5% drop)
        if metrics['accuracy'] < (self.baseline_metrics['accuracy'] * 0.95):
            acc_reward = -10.0
        else:
            acc_reward = acc_drop * self.config.ACC_REWARD_SCALE
            
        eff_reward = (flops_reduction * self.config.FLOPS_REWARD_SCALE) + \
                     (params_reduction * self.config.PARAMS_REWARD_SCALE)
        
        total_reward = acc_reward + eff_reward
        
        obs = np.array([metrics['accuracy'], acc_drop, params_reduction, flops_reduction], dtype=np.float32)
        
        info = {
            "gru_l0": l0_rate,
            "gru_l1": l1_rate,
            "linear": linear_rate,
            "quant": use_quantization,
            "accuracy": metrics['accuracy'],
            "reward": total_reward
        }
        
        return obs, total_reward, True, False, info

    def reset(self, seed=None):
        super().reset(seed=seed)
        obs = np.array([self.baseline_metrics['accuracy'], 0.0, 0.0, 0.0], dtype=np.float32)
        return obs, {}

# 6. Main Execution Pipeline

In [9]:
# 1. Load Data & Baseline
try:
    processed_data = torch.load(Config.PROCESSED_DATA_PATH)
    val_dataset = TensorDataset(processed_data['X_val'], processed_data['y_val'])
    val_loader = DataLoader(val_dataset, batch_size=1024, shuffle=False)

    baseline_model = WeatherGRU(Config)
    baseline_model.load_state_dict(torch.load(Config.BASELINE_MODEL_PATH))
    print("Baseline Model loaded successfully.")
except FileNotFoundError as e:
    print(f"Critical Error: Artifacts not found. {e}")

# 2. Initialize Continuous Environment
env = SustainableAIAgentEnvExpanded(baseline_model, val_loader, Config)

# 3. Train PPO Agent (Continuous Policy)
print("\n--- Initiating Continuous PPO Training (Deep Layer-wise Optimization) ---")
checkpoint_callback = CheckpointCallback(save_freq=5000, save_path='/kaggle/working/checkpoints_expanded/', name_prefix='ppo_continuous')

# MlpPolicy handles continuous spaces automatically
agent = PPO("MlpPolicy", env, verbose=1, seed=Config.SEED, ent_coef=0.01, device=Config.DEVICE)
agent.learn(total_timesteps=Config.TOTAL_TIMESTEPS, callback=checkpoint_callback)
agent.save(Config.AGENT_SAVE_PATH)
print("Training Procedure Concluded.")

# 4. Extract & Validate Best Strategy
obs, _ = env.reset()
action, _ = agent.predict(obs, deterministic=True)
_, _, _, _, best_info = env.step(action)

print("\n--- Optimal Strategy Discovered (Global Optimum) ---")
print(f"GRU Layer 0 Pruning Rate (Feature Extraction): {best_info['gru_l0']*100:.2f}%")
print(f"GRU Layer 1 Pruning Rate (Abstract Representation): {best_info['gru_l1']*100:.2f}%")
print(f"Linear Readout Pruning Rate: {best_info['linear']*100:.2f}%")
print(f"Dynamic Quantization Applied: {best_info['quant']}")
print(f"Resulting Accuracy: {best_info['accuracy']:.4f}")

Baseline Model loaded successfully.
Baseline Benchmark Metrics: {'accuracy': 0.6608695652173913, 'params': 596225, 'flops': 1000000.0}

--- Initiating Continuous PPO Training (Deep Layer-wise Optimization) ---
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -1.1     |
| time/              |          |
|    fps             | 42       |
|    iterations      | 1        |
|    time_elapsed    | 48       |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1          |
|    ep_rew_mean          | -0.465     |
| time/                   |            |
|    fps                  | 41         |
|    iterations           | 2          |
|    time_elapsed         | 98         |
|    total_timesteps      |

In [10]:
# 5. Save for Final Evaluation
with open(Config.BEST_ACTION_SAVE_PATH, 'w') as f:
    json.dump({
        "gru_l0_rate": float(best_info['gru_l0']),
        "gru_l1_rate": float(best_info['gru_l1']),
        "linear_pruning_rate": float(best_info['linear']),
        "quantization": bool(best_info['quant'])
    }, f)
print(f"Optimal policy parameters saved to {Config.BEST_ACTION_SAVE_PATH}")

Optimal policy parameters saved to /kaggle/working/best_action_expanded.json
