In [1]:
from IPython import get_ipython
import random
import matplotlib.pyplot as plt
from IPython import display
from tqdm.notebook import tqdm
from dynamic_env import TaskEnv_drift
from typing import Tuple, List
import itertools as it
import numpy as np
import pandas as pd
import plotly.express as px
import statistics as s


import torch
import torch.nn as nn
import torch.optim as optim
import gymnasium as gym
from collections import deque
import ast



In [None]:
env = TaskEnv_drift()

In [None]:
def drift_control(add_states=0,add_actions=0, type_drif=None): #add other variable to control the type
    """which episode drift happen and which type""" 
    env.set_flag()
    env.drift(add_actions,add_states)
    #qlearner.change_qtable()
    return

In [2]:
# 定义 State-Action Embedding 和 Q-Network
class StateActionEmbedding(nn.Module):
    def __init__(self, state_dim, action_dim, embedding_dim=16):
        super(StateActionEmbedding, self).__init__()
        self.state_embedding = nn.Sequential(
            nn.Linear(state_dim, 32),
            nn.ReLU(),
            nn.Linear(32, embedding_dim)
        )
        self.action_embedding = nn.Sequential(
            nn.Linear(action_dim, 32),
            nn.ReLU(),
            nn.Linear(32, embedding_dim)
        )

    def forward(self, state, action):
        state_embed = self.state_embedding(state)
        action_embed = self.action_embedding(action)
        return torch.cat([state_embed, action_embed], dim=-1)

class QNetwork(nn.Module):
    def __init__(self, embedding_dim, hidden_dim=64):
        super(QNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(embedding_dim * 2, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, state_action_embed):
        return self.fc(state_action_embed)

# 定义 DQN Agent
class DQNAgent:
    def __init__(self, state_dim, action_dim, embedding_dim=16, gamma=0.9, lr=1e-3, epsilon=0.2, epsilon_decay=0.995, epsilon_min=0.01):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min

        self.losses = []
        self.episode_rewards = []

        # 网络
        self.embedding_net = StateActionEmbedding(state_dim, action_dim, embedding_dim)
        self.q_net = QNetwork(embedding_dim)
        self.target_q_net = QNetwork(embedding_dim)
        self.target_q_net.load_state_dict(self.q_net.state_dict())

        # 优化器
        self.optimizer = optim.Adam(list(self.embedding_net.parameters()) + list(self.q_net.parameters()), lr=lr)

        # 经验回放
        self.memory = deque(maxlen=100)

    def get_action(self, state, state_to_index):
        if np.random.rand() < self.epsilon:
            return random.randint(0, self.action_dim - 1)  # 随机动作
        else:
            state_index = state_to_index[state]  # 将 state 转换为索引
            state_one_hot = torch.zeros(self.state_dim).unsqueeze(0)
            state_one_hot[0, state_index] = 1  # 转换为 one-hot 编码
            q_values = []
            for action in range(self.action_dim):
                action_one_hot = torch.zeros(self.action_dim).unsqueeze(0)
                action_one_hot[0, action] = 1
                state_action_embed = self.embedding_net(state_one_hot, action_one_hot)
                q_value = self.q_net(state_action_embed)
                q_values.append(q_value.item())
            return np.argmax(q_values)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train(self, batch_size=1, state_to_index=None):
        if len(self.memory) < batch_size:
            return

        # 从经验回放中采样一个样本（batch_size=1）
        state, action, reward, next_state, done = self.memory[-1]

        # 将 state 和 next_state 转换为 one-hot 编码
        state_index = state_to_index[state]
        state_one_hot = torch.zeros(self.state_dim).unsqueeze(0)
        state_one_hot[0, state_index] = 1

        next_state_index = state_to_index[next_state]
        next_state_one_hot = torch.zeros(self.state_dim).unsqueeze(0)
        next_state_one_hot[0, next_state_index] = 1

        # 将 action 转换为 one-hot 编码
        action_one_hot = torch.zeros(self.action_dim).unsqueeze(0)
        action_one_hot[0, action] = 1

        reward = torch.FloatTensor([reward])
        done = torch.FloatTensor([done])

        # 计算当前 Q 值
        state_action_embed = self.embedding_net(state_one_hot, action_one_hot)
        current_q = self.q_net(state_action_embed)

        # 计算目标 Q 值
        with torch.no_grad():
            next_q_values = []
            for next_action in range(self.action_dim):
                next_action_one_hot = torch.zeros(self.action_dim).unsqueeze(0)
                next_action_one_hot[0, next_action] = 1
                next_state_action_embed = self.embedding_net(next_state_one_hot, next_action_one_hot)
                next_q = self.target_q_net(next_state_action_embed)
                next_q_values.append(next_q.item())
            next_q_max = max(next_q_values)
            target_q = reward + (1 - done) * self.gamma * next_q_max

        # 计算损失并更新网络
        loss = nn.MSELoss()(current_q, target_q.unsqueeze(1))
        self.losses.append(loss.item())
        
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # 更新 epsilon
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

        # 更新目标网络
    def plot_training_progress(self):
        """绘制训练过程中的reward变化"""
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10))
        
        # 绘制episode rewards
        ax1.plot(self.episode_rewards, label='Episode Reward', alpha=0.6)
        ax1.set_title('Training Rewards')
        ax1.set_xlabel('Episode')
        ax1.set_ylabel('Reward')
        ax1.legend()
        ax1.grid(True)
        
        # 绘制损失变化
        ax2.plot(self.losses, label='Loss', alpha=0.6)
        ax2.set_title('Training Loss')
        ax2.set_xlabel('Training Step')
        ax2.set_ylabel('Loss')
        ax2.legend()
        ax2.grid(True)
        
        plt.tight_layout()
        plt.show()




In [3]:
env = TaskEnv_drift()  
state_dim = len(env.states) 
action_dim = len(env.motions)  

# 创建 state 到索引的映射
state_to_index = {state: idx for idx, state in enumerate(env.states)}
agent = DQNAgent(state_dim, action_dim)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  frequencies[label][action] = ast.literal_eval(frequencies[label][action]) #判断需要计算的内容是不是合法的Python类型，如果是则执行，否则就报错


In [None]:
def run_agent(num_episodes, drift_ep=0):
    for episode in range(num_episodes):
        state = env.reset()
        total_reward = 0

        if drift_ep != 0 and episode == drift_ep: #drift at 
            drift_control(add_states=0,add_actions=0)
         
        while True:
            action = agent.get_action(state, state_to_index)
            next_state, reward, done, _ = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            agent.train(batch_size=1, state_to_index=state_to_index)
            total_reward += reward
            state = next_state   
        
            if done:
                break
        agent.episode_rewards.append(total_reward)
    return agent.episode_rewards


In [None]:
#previous env, only simple sudden fridt
runs = 5 #repeat trainning process
episodes = 1000
drift_ep = 200

DQag_drift = np.empty((episodes, runs))
DQag = np.empty((episodes, runs))
for i in tqdm(range(runs)):
    agent = DQNAgent(state_dim, action_dim)
    DQag[:, i] = run_agent(episodes,drift_ep = 0)
    
for i in tqdm(range(runs)):
    agent = DQNAgent(state_dim, action_dim)
    DQag_drift[:, i] = run_agent(episodes,drift_ep)




In [None]:
env.episode_actions

In [None]:

data_eps = {'without': np.apply_along_axis(np.mean, 1, DQag), 
            'drift': np.apply_along_axis(np.mean, 1, DQag_drift)}

df_eps = pd.DataFrame(data=data_eps)
#print(df_eps.shape)
#print(data_eps)
fug = px.line(data_eps, y=['without','drift'],title='DQN with sudden drift')
fug.update_layout(xaxis_title = 'Episodes', yaxis_title = 'Average sum of reward')
fug.show()

In [4]:
import inspect
import dyna_env_drifttype
# 显示模块中的所有成员
#print(dir(dyna_env_drifttype))
#info = True 可以输出trojactory
from dyna_env_drifttype import TaskEnv_driftype
env_dt = TaskEnv_driftype()

state_dim = len(env_dt.states) 
action_dim = len(env_dt.motions)  
# 创建 state 到索引的映射
state_to_index = {state: idx for idx, state in enumerate(env_dt.states)}
agent = DQNAgent(state_dim, action_dim)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  frequencies[label][action] = ast.literal_eval(frequencies[label][action]) #判断需要计算的内容是不是合法的Python类型，如果是则执行，否则就报错


In [22]:
def run_agent_dt(apply_drift=False,add_actions=0, change_at_states=['sib','pp'], drift_dis_type='inverse',intensity = 0.5,drift_type='sudden'):
    #new run agent method for drift type env
    num_episodes = 1000
    drift_ep=300
    env_dt = TaskEnv_driftype()
    agent = DQNAgent(state_dim, action_dim)
    step_counts = []
    information ={}


    for episode in range(num_episodes):
        step =0
        state = env_dt.reset()
        total_reward = 0
        
        if apply_drift and episode == drift_ep:  
            print(f"Drift applied at episode {episode}")
            env_dt.set_flag()
            env_dt.drift(add_actions, change_at_states, drift_dis_type,intensity, drift_type)  # 仅改变转移概率
        
        state = env_dt.reset()
        done = False

        while True:
            step +=1
            action = agent.get_action(state, state_to_index)
            next_state, reward, done, info = env_dt.step(action)
            if info != []:
                key = ''.join(str(x) for x in info)
                if key not in information:
                    information[key] = 1
                else:
                    information[key] += 1
            agent.remember(state, action, reward, next_state, done)
            agent.train(batch_size=1, state_to_index=state_to_index)
            total_reward += reward
            state = next_state   
        
            if done:
                break

        agent.episode_rewards.append(total_reward)
        step_counts.append(step)

    return agent.episode_rewards,step_counts,information

In [23]:

runs = 2 #repeat trainning process
episodes = 1000
drift_ep = 300
DQag_drift = np.empty((episodes, runs))
DQag = np.empty((episodes, runs))
for i in tqdm(range(runs)):
    agent = DQNAgent(state_dim, action_dim)
    DQag[:, i],_,_ = run_agent_dt(apply_drift=False)
    
for i in tqdm(range(runs)):
    agent = DQNAgent(state_dim, action_dim)
    DQag_drift[:, i],_,_ = run_agent_dt(apply_drift=True)





  0%|          | 0/2 [00:00<?, ?it/s]


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the defaul

  0%|          | 0/2 [00:00<?, ?it/s]


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 300
drift happen
change_frequencies function running



ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 300
drift happen
change_frequencies function running


In [None]:
data_eps = {'without': np.apply_along_axis(np.mean, 1, DQag), 
            'drift': np.apply_along_axis(np.mean, 1, DQag_drift)}


# 创建 DataFrame
df_eps = pd.DataFrame(data=data_eps)

# 应用滑动平均（窗口大小为5）
df_eps_smooth = df_eps.rolling(window=10).mean()
fug = px.line(df_eps_smooth, y=['without', 'drift'])
fug.update_layout(xaxis_title='Episodes', yaxis_title='Average sum of reward (Smoothed, window=10)')
fug.show()

# 使用 plotly 画图
fug = px.line(df_eps, y=['without', 'drift'])
fug.update_layout(xaxis_title='Episodes', yaxis_title='Average sum of reward')
fug.show()




In [25]:
runs = 5 #repeat trainning process
episodes = 1000
drift_ep = 300
DQag_drift = np.empty((episodes, runs))
DQag = np.empty((episodes, runs))

for i in tqdm(range(runs)):
    agent = DQNAgent(state_dim, action_dim)
    DQag[:, i],_,_ = run_agent_dt(apply_drift=False)
    
for i in tqdm(range(runs)):
    agent = DQNAgent(state_dim, action_dim)
    DQag_drift[:, i],_,_ = run_agent_dt(apply_drift=True,drift_dis_type='similar')



data_eps = {'without': np.apply_along_axis(np.mean, 1, DQag), 
            'drift': np.apply_along_axis(np.mean, 1, DQag_drift)}

df_eps = pd.DataFrame(data=data_eps)
#print(df_eps.shape)
#print(data_eps)
fug = px.line(data_eps, y=['without','drift'])
fug.update_layout(xaxis_title = 'Episodes', yaxis_title = 'Average sum of reward')
fug.show()

  0%|          | 0/5 [00:00<?, ?it/s]


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the defaul

  0%|          | 0/5 [00:00<?, ?it/s]


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 300
drift happen
change_frequencies function running



ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 300
drift happen
change_frequencies function running



ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 300
drift happen
change_frequencies function running



ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 300
drift happen
change_frequencies function running



ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 300
drift happen
change_frequencies function running


In [26]:
df_eps_smooth = df_eps.rolling(window=10).mean()
fug = px.line(df_eps_smooth, y=['without', 'drift'])
fug.update_layout(xaxis_title='Episodes', yaxis_title='Average sum of reward (Smoothed, window=10)')
fug.show()

In [None]:
import os
import numpy as np
import pandas as pd
from itertools import cycle
import matplotlib.pyplot as plt
from tqdm import tqdm

class DQNDriftExperimentManager:
    """Manager for DQN experiments under different drift settings."""

    def __init__(self, output_dir="results"):
        self.output_dir = output_dir
        os.makedirs(output_dir, exist_ok=True)
        self.base_config = {
            'no_drift': {
                'name': 'No Drift',
                'apply_drift': False,
                'color': 'blue'
            }
        }

    def define_drift_configs(self, drift_configs=None):
        if drift_configs is None:
            self.drift_configs = {
                'random_transition': {
                    'name': 'Random Transition',
                    'apply_drift': True,
                    'drift_params': {
                        'add_actions': 0,
                        'change_at_states': ['va', 'po', 'sib', 'pp'],
                        'drift_dis_type': "random",
                        'intensity': 1.0,
                        'drift_type': 'sudden'
                    },
                    'color': 'red'
                },
                'inverse_transition': {
                    'name': 'Inverse Transition',
                    'apply_drift': True,
                    'drift_params': {
                        'add_actions': 0,
                        'change_at_states': ['va', 'po', 'sib', 'pp'],
                        'drift_dis_type': "inverse",
                        'intensity': 0.8,
                        'drift_type': 'sudden'
                    },
                    'color': 'purple'
                }
            }
        else:
            self.drift_configs = drift_configs

        self.all_configs = {**self.base_config, **self.drift_configs}
        return self.all_configs

    def run_single_experiment(self, config, total_episodes=1000, drift_episode=300):
        env = TaskEnv_driftype()
        state_dim = env.state_dim
        action_dim = env.action_space.n

        agent = DQNAgent(state_dim, action_dim)
        rewards = []
        step_counts = []
        information = {}

        for episode in range(total_episodes):
            if config.get('apply_drift', False) and episode == drift_episode:
                print(f"Applying drift at episode {episode}")
                env.set_flag()
                env.drift(**config['drift_params'])

            state = env.reset()
            done = False
            episode_reward = 0
            step = 0

            while not done:
                step += 1
                action = agent.get_action(state, state_to_index)
                next_state, reward, done, info = env.step(action)
                if info:
                    key = ''.join(map(str, info))
                    information[key] = information.get(key, 0) + 1

                agent.remember(state, action, reward, next_state, done)
                agent.train(batch_size=1, state_to_index=state_to_index)

                episode_reward += reward
                state = next_state

            agent.episode_rewards.append(episode_reward)
            rewards.append(episode_reward)
            step_counts.append(step)

        return rewards, step_counts, information

    def run_multiple_trials(self, config, num_runs=5, total_episodes=1000, drift_episode=300):
        all_rewards = []
        for _ in tqdm(range(num_runs), desc=f"Running {config['name']}"):
            rewards, _, _ = self.run_single_experiment(config, total_episodes, drift_episode)
            all_rewards.append(rewards)

        avg_rewards = np.mean(all_rewards, axis=0)
        std_rewards = np.std(all_rewards, axis=0)
        return avg_rewards, std_rewards

    def run_experiments(self, configs, num_runs=5, total_episodes=1000, drift_episode=300):
        results = {}
        for name, cfg in configs.items():
            print(f"\nRunning config: {cfg['name']}")
            avg, std = self.run_multiple_trials(cfg, num_runs, total_episodes, drift_episode)
            results[name] = {
                'avg': avg,
                'std': std,
                'color': cfg['color'],
                'name': cfg['name']
            }
        return results

    def visualize_results(self, results, title, filename, drift_episode=300):
        plt.figure(figsize=(12, 6))
        line_styles = cycle(['-', '--', '-.', ':'])

        for config_name, data in results.items():
            linestyle = next(line_styles)
            plt.plot(data['avg'], label=data['name'], color=data['color'], linestyle=linestyle)
            plt.fill_between(range(len(data['avg'])),
                             data['avg'] - data['std'],
                             data['avg'] + data['std'],
                             alpha=0.15, color=data['color'])

        plt.axvline(x=drift_episode, color='gray', linestyle='--')
        plt.text(drift_episode + 5, min(data['avg']), 'Drift Point', color='gray')
        plt.title(title)
        plt.xlabel("Episodes")
        plt.ylabel("Average Reward")
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.savefig(os.path.join(self.output_dir, filename))
        plt.show()

    def export_results_to_csv(self, results, filename):
        df_avg = pd.DataFrame({data['name']: data['avg'] for _, data in results.items()})
        df_std = pd.DataFrame({f"{data['name']}_std": data['std'] for _, data in results.items()})
        df_combined = pd.concat([df_avg, df_std], axis=1)
        df_combined.to_csv(os.path.join(self.output_dir, filename), index=True)

    def run_all_experiments(self):
        print("=== Running DQN Drift Experiments ===")
        self.define_drift_configs()
        results = self.run_experiments(self.all_configs)
        self.visualize_results(results, "DQN Performance Under Drift", "dqn_drift_comparison.png")
        self.export_results_to_csv(results, "dqn_drift_results.csv")


In [None]:
if __name__ == "__main__":
    manager = DQNDriftExperimentManager(output_dir="dqn_drift_results")
    manager.run_all_experiments()
