In [None]:
from IPython import get_ipython
import random
import matplotlib.pyplot as plt
from IPython import display
from tqdm.notebook import tqdm
from dynamic_env import TaskEnv_drift
from typing import Tuple, List
import itertools as it
import numpy as np
import pandas as pd
import plotly.express as px
import statistics as s


import torch
import torch.nn as nn
import torch.optim as optim
import gymnasium as gym
from collections import deque
import ast



In [3]:
env = TaskEnv_drift()

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  frequencies[label][action] = ast.literal_eval(frequencies[label][action]) #判断需要计算的内容是不是合法的Python类型，如果是则执行，否则就报错


In [4]:
def drift_control(add_states=0,add_actions=0, type_drif=None): #add other variable to control the type
    """which episode drift happen and which type""" 
    env.set_flag()
    env.drift(add_actions,add_states)
    #qlearner.change_qtable()
    return

In [None]:
# 定义 State-Action Embedding 和 Q-Network
class StateActionEmbedding(nn.Module):
    def __init__(self, state_dim, action_dim, embedding_dim=16):
        super(StateActionEmbedding, self).__init__()
        self.state_embedding = nn.Sequential(
            nn.Linear(state_dim, 32),
            nn.ReLU(),
            nn.Linear(32, embedding_dim)
        )
        self.action_embedding = nn.Sequential(
            nn.Linear(action_dim, 32),
            nn.ReLU(),
            nn.Linear(32, embedding_dim)
        )

    def forward(self, state, action):
        state_embed = self.state_embedding(state)
        action_embed = self.action_embedding(action)
        return torch.cat([state_embed, action_embed], dim=-1)

class QNetwork(nn.Module):
    def __init__(self, embedding_dim, hidden_dim=64):
        super(QNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(embedding_dim * 2, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, state_action_embed):
        return self.fc(state_action_embed)

# 定义 DQN Agent
class DQNAgent:
    def __init__(self, state_dim, action_dim, embedding_dim=16, gamma=0.9, lr=1e-3, epsilon=0.2, epsilon_decay=0.995, epsilon_min=0.01):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min

        self.losses = []
        self.episode_rewards = []

        # 网络
        self.embedding_net = StateActionEmbedding(state_dim, action_dim, embedding_dim)
        self.q_net = QNetwork(embedding_dim)
        self.target_q_net = QNetwork(embedding_dim)
        self.target_q_net.load_state_dict(self.q_net.state_dict())

        # 优化器
        self.optimizer = optim.Adam(list(self.embedding_net.parameters()) + list(self.q_net.parameters()), lr=lr)

        # 经验回放
        self.memory = deque(maxlen=100)

    def get_action(self, state, state_to_index):
        if np.random.rand() < self.epsilon:
            return random.randint(0, self.action_dim - 1)  # 随机动作
        else:
            state_index = state_to_index[state]  # 将 state 转换为索引
            state_one_hot = torch.zeros(self.state_dim).unsqueeze(0)
            state_one_hot[0, state_index] = 1  # 转换为 one-hot 编码
            q_values = []
            for action in range(self.action_dim):
                action_one_hot = torch.zeros(self.action_dim).unsqueeze(0)
                action_one_hot[0, action] = 1
                state_action_embed = self.embedding_net(state_one_hot, action_one_hot)
                q_value = self.q_net(state_action_embed)
                q_values.append(q_value.item())
            return np.argmax(q_values)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train(self, batch_size=1, state_to_index=None):
        if len(self.memory) < batch_size:
            return

        # 从经验回放中采样一个样本（batch_size=1）
        state, action, reward, next_state, done = self.memory[-1]

        # 将 state 和 next_state 转换为 one-hot 编码
        state_index = state_to_index[state]
        state_one_hot = torch.zeros(self.state_dim).unsqueeze(0)
        state_one_hot[0, state_index] = 1

        next_state_index = state_to_index[next_state]
        next_state_one_hot = torch.zeros(self.state_dim).unsqueeze(0)
        next_state_one_hot[0, next_state_index] = 1

        # 将 action 转换为 one-hot 编码
        action_one_hot = torch.zeros(self.action_dim).unsqueeze(0)
        action_one_hot[0, action] = 1

        reward = torch.FloatTensor([reward])
        done = torch.FloatTensor([done])

        # 计算当前 Q 值
        state_action_embed = self.embedding_net(state_one_hot, action_one_hot)
        current_q = self.q_net(state_action_embed)

        # 计算目标 Q 值
        with torch.no_grad():
            next_q_values = []
            for next_action in range(self.action_dim):
                next_action_one_hot = torch.zeros(self.action_dim).unsqueeze(0)
                next_action_one_hot[0, next_action] = 1
                next_state_action_embed = self.embedding_net(next_state_one_hot, next_action_one_hot)
                next_q = self.target_q_net(next_state_action_embed)
                next_q_values.append(next_q.item())
            next_q_max = max(next_q_values)
            target_q = reward + (1 - done) * self.gamma * next_q_max

        # 计算损失并更新网络
        loss = nn.MSELoss()(current_q, target_q.unsqueeze(1))
        self.losses.append(loss.item())
        
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # 更新 epsilon
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

        # 更新目标网络
    def plot_training_progress(self):
        """绘制训练过程中的reward变化"""
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10))
        
        # 绘制episode rewards
        ax1.plot(self.episode_rewards, label='Episode Reward', alpha=0.6)
        ax1.set_title('Training Rewards')
        ax1.set_xlabel('Episode')
        ax1.set_ylabel('Reward')
        ax1.legend()
        ax1.grid(True)
        
        # 绘制损失变化
        ax2.plot(self.losses, label='Loss', alpha=0.6)
        ax2.set_title('Training Loss')
        ax2.set_xlabel('Training Step')
        ax2.set_ylabel('Loss')
        ax2.legend()
        ax2.grid(True)
        
        plt.tight_layout()
        plt.show()




In [6]:
env = TaskEnv_drift()  
state_dim = len(env.states) 
action_dim = len(env.motions)  

# 创建 state 到索引的映射
state_to_index = {state: idx for idx, state in enumerate(env.states)}
agent = DQNAgent(state_dim, action_dim)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  frequencies[label][action] = ast.literal_eval(frequencies[label][action]) #判断需要计算的内容是不是合法的Python类型，如果是则执行，否则就报错


In [None]:
def run_agent(num_episodes, drift_ep=0):
    for episode in range(num_episodes):
        state = env.reset()
        total_reward = 0

        if drift_ep != 0 and episode == drift_ep: #drift at 
            drift_control(add_states=0,add_actions=0)
         
        while True:
            action = agent.get_action(state, state_to_index)
            next_state, reward, done, _ = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            agent.train(batch_size=1, state_to_index=state_to_index)
            total_reward += reward
            state = next_state   
        
            if done:
                break
        agent.episode_rewards.append(total_reward)
    return agent.episode_rewards


In [10]:
#previous env, only simple sudden fridt
runs = 5 #repeat trainning process
episodes = 1000
drift_ep = 500
DQag_drift = np.empty((episodes, runs))
#agent1 = run_qlearner(episodesT, 0.2, epsilon=0.1, alpha=0.2)[0]
DQag = np.empty((episodes, runs))
for i in tqdm(range(runs)):
    agent = DQNAgent(state_dim, action_dim)
    DQag[:, i] = run_agent(episodes,drift_ep = 0)
    
for i in tqdm(range(runs)):
    agent = DQNAgent(state_dim, action_dim)
    DQag_drift[:, i] = run_agent(episodes,drift_ep)



data_eps = {'without': np.apply_along_axis(np.mean, 1, DQag), 
            'drift': np.apply_along_axis(np.mean, 1, DQag_drift)}

df_eps = pd.DataFrame(data=data_eps)
#print(df_eps.shape)
#print(data_eps)
fug = px.line(data_eps, y=['without','drift'])
fug.update_layout(xaxis_title = 'Episodes', yaxis_title = 'Average sum of reward')
fug.show()

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

drift happen
drift happen
drift happen
drift happen
drift happen


In [32]:
import inspect
import dyna_env_drifttype
# 显示模块中的所有成员
#print(dir(dyna_env_drifttype))
from dyna_env_drifttype import TaskEnv_driftype
env_dt = TaskEnv_driftype()

state_dim = len(env_dt.states) 
action_dim = len(env_dt.motions)  
# 创建 state 到索引的映射
state_to_index = {state: idx for idx, state in enumerate(env_dt.states)}
agent = DQNAgent(state_dim, action_dim)


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




In [None]:
def run_agent_dt(apply_drift=False,add_actions=0, change_at_states=['va', 'po', 'sib', 'pp'], drift_dis_type='inverse',intensity = 0.5,drift_type='sudden'):
    #new run agent method for drift type env
    num_episodes = 1000
    drift_ep=500
    env_dt = TaskEnv_driftype()
    agent = DQNAgent(state_dim, action_dim)


    for episode in range(num_episodes):
        state = env_dt.reset()
        total_reward = 0
        
        if apply_drift and episode == drift_ep:  
            print(f"Drift applied at episode {episode}")
            env_dt.set_flag()
            env_dt.drift(add_actions, change_at_states, drift_dis_type,intensity, drift_type)  # 仅改变转移概率
        
        state = env_dt.reset()
        done = False

        while True:
            action = agent.get_action(state, state_to_index)
            next_state, reward, done, _ = env_dt.step(action)
            agent.remember(state, action, reward, next_state, done)
            agent.train(batch_size=1, state_to_index=state_to_index)
            total_reward += reward
            state = next_state   
        
            if done:
                break

        agent.episode_rewards.append(total_reward)

    return agent.episode_rewards

In [34]:

runs = 5 #repeat trainning process
episodes = 1000
drift_ep = 500
DQag_drift = np.empty((episodes, runs))
DQag = np.empty((episodes, runs))
for i in tqdm(range(runs)):
    agent = DQNAgent(state_dim, action_dim)
    DQag[:, i] = run_agent_dt(apply_drift=False)
    
for i in tqdm(range(runs)):
    agent = DQNAgent(state_dim, action_dim)
    DQag_drift[:, i] = run_agent_dt(apply_drift=True)



data_eps = {'without': np.apply_along_axis(np.mean, 1, DQag), 
            'drift': np.apply_along_axis(np.mean, 1, DQag_drift)}

df_eps = pd.DataFrame(data=data_eps)
#print(df_eps.shape)
#print(data_eps)
fug = px.line(data_eps, y=['without','drift'])
fug.update_layout(xaxis_title = 'Episodes', yaxis_title = 'Average sum of reward')
fug.show()

  0%|          | 0/5 [00:00<?, ?it/s]


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the defaul

  0%|          | 0/5 [00:00<?, ?it/s]


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 500
drift happen
1.0 [0.01603948 0.14497224 0.57001851 0.24676126 0.02220851]
new prob 1.0
1.0 [0.13498952 0.57497755 0.2481293  0.01945525 0.02244837]
new prob 1.0
1.0 [0.59183673 0.18367347 0.1377551  0.05612245 0.03061224]
new prob 1.0
1.0 [0.17118402 0.5149786  0.26248217 0.0256776  0.0256776 ]
new prob 1.0
1.0 [0.54969749 0.25842697 0.0207433  0.14952463 0.02160761]
new prob 0.9999999999999999
1.0000000000000002 [0.21645022 0.16883117 0.54978355 0.03463203 0.03030303]
new prob 1.0
0.9999999999999999 [0.44444444 0.00854701 0.37606838 0.06837607 0.1025641 ]
new prob 1.0
1.0 [0.18297872 0.14893617 0.14893617 0.49787234 0.0212766 ]
new prob 1.0
1.0000000000000002 [0.51951548 0.21265141 0.03633917 0.08748318 0.14401077]
new prob 1.0
1.0 [0.51898734 0.07594937 0.08860759 0.21518987 0.10126582]
new prob 1.0
1.0 [0.49152542 0.0720339  0.19915254 0.19915254 0.03813559]
new prob 1.0
0.9999999999999999 [0.49450549 0.1978022  0.13186813 0.13736264 0.03846154]
new prob


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 500
drift happen
1.0 [0.01603948 0.14497224 0.57001851 0.24676126 0.02220851]
new prob 1.0
1.0 [0.13498952 0.57497755 0.2481293  0.01945525 0.02244837]
new prob 1.0
1.0 [0.59183673 0.18367347 0.1377551  0.05612245 0.03061224]
new prob 1.0
1.0 [0.17118402 0.5149786  0.26248217 0.0256776  0.0256776 ]
new prob 1.0
1.0 [0.54969749 0.25842697 0.0207433  0.14952463 0.02160761]
new prob 0.9999999999999999
1.0000000000000002 [0.21645022 0.16883117 0.54978355 0.03463203 0.03030303]
new prob 1.0
0.9999999999999999 [0.44444444 0.00854701 0.37606838 0.06837607 0.1025641 ]
new prob 1.0
1.0 [0.18297872 0.14893617 0.14893617 0.49787234 0.0212766 ]
new prob 1.0
1.0000000000000002 [0.51951548 0.21265141 0.03633917 0.08748318 0.14401077]
new prob 1.0
1.0 [0.51898734 0.07594937 0.08860759 0.21518987 0.10126582]
new prob 1.0
1.0 [0.49152542 0.0720339  0.19915254 0.19915254 0.03813559]
new prob 1.0
0.9999999999999999 [0.49450549 0.1978022  0.13186813 0.13736264 0.03846154]
new prob


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 500
drift happen
1.0 [0.01603948 0.14497224 0.57001851 0.24676126 0.02220851]
new prob 1.0
1.0 [0.13498952 0.57497755 0.2481293  0.01945525 0.02244837]
new prob 1.0
1.0 [0.59183673 0.18367347 0.1377551  0.05612245 0.03061224]
new prob 1.0
1.0 [0.17118402 0.5149786  0.26248217 0.0256776  0.0256776 ]
new prob 1.0
1.0 [0.54969749 0.25842697 0.0207433  0.14952463 0.02160761]
new prob 0.9999999999999999
1.0000000000000002 [0.21645022 0.16883117 0.54978355 0.03463203 0.03030303]
new prob 1.0
0.9999999999999999 [0.44444444 0.00854701 0.37606838 0.06837607 0.1025641 ]
new prob 1.0
1.0 [0.18297872 0.14893617 0.14893617 0.49787234 0.0212766 ]
new prob 1.0
1.0000000000000002 [0.51951548 0.21265141 0.03633917 0.08748318 0.14401077]
new prob 1.0
1.0 [0.51898734 0.07594937 0.08860759 0.21518987 0.10126582]
new prob 1.0
1.0 [0.49152542 0.0720339  0.19915254 0.19915254 0.03813559]
new prob 1.0
0.9999999999999999 [0.49450549 0.1978022  0.13186813 0.13736264 0.03846154]
new prob


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 500
drift happen
1.0 [0.01603948 0.14497224 0.57001851 0.24676126 0.02220851]
new prob 1.0
1.0 [0.13498952 0.57497755 0.2481293  0.01945525 0.02244837]
new prob 1.0
1.0 [0.59183673 0.18367347 0.1377551  0.05612245 0.03061224]
new prob 1.0
1.0 [0.17118402 0.5149786  0.26248217 0.0256776  0.0256776 ]
new prob 1.0
1.0 [0.54969749 0.25842697 0.0207433  0.14952463 0.02160761]
new prob 0.9999999999999999
1.0000000000000002 [0.21645022 0.16883117 0.54978355 0.03463203 0.03030303]
new prob 1.0
0.9999999999999999 [0.44444444 0.00854701 0.37606838 0.06837607 0.1025641 ]
new prob 1.0
1.0 [0.18297872 0.14893617 0.14893617 0.49787234 0.0212766 ]
new prob 1.0
1.0000000000000002 [0.51951548 0.21265141 0.03633917 0.08748318 0.14401077]
new prob 1.0
1.0 [0.51898734 0.07594937 0.08860759 0.21518987 0.10126582]
new prob 1.0
1.0 [0.49152542 0.0720339  0.19915254 0.19915254 0.03813559]
new prob 1.0
0.9999999999999999 [0.49450549 0.1978022  0.13186813 0.13736264 0.03846154]
new prob


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 500
drift happen
1.0 [0.01603948 0.14497224 0.57001851 0.24676126 0.02220851]
new prob 1.0
1.0 [0.13498952 0.57497755 0.2481293  0.01945525 0.02244837]
new prob 1.0
1.0 [0.59183673 0.18367347 0.1377551  0.05612245 0.03061224]
new prob 1.0
1.0 [0.17118402 0.5149786  0.26248217 0.0256776  0.0256776 ]
new prob 1.0
1.0 [0.54969749 0.25842697 0.0207433  0.14952463 0.02160761]
new prob 0.9999999999999999
1.0000000000000002 [0.21645022 0.16883117 0.54978355 0.03463203 0.03030303]
new prob 1.0
0.9999999999999999 [0.44444444 0.00854701 0.37606838 0.06837607 0.1025641 ]
new prob 1.0
1.0 [0.18297872 0.14893617 0.14893617 0.49787234 0.0212766 ]
new prob 1.0
1.0000000000000002 [0.51951548 0.21265141 0.03633917 0.08748318 0.14401077]
new prob 1.0
1.0 [0.51898734 0.07594937 0.08860759 0.21518987 0.10126582]
new prob 1.0
1.0 [0.49152542 0.0720339  0.19915254 0.19915254 0.03813559]
new prob 1.0
0.9999999999999999 [0.49450549 0.1978022  0.13186813 0.13736264 0.03846154]
new prob

In [35]:
runs = 5 #repeat trainning process
episodes = 1000
drift_ep = 500
DQag_drift = np.empty((episodes, runs))
DQag = np.empty((episodes, runs))
for i in tqdm(range(runs)):
    agent = DQNAgent(state_dim, action_dim)
    DQag[:, i] = run_agent_dt(apply_drift=False)
    
for i in tqdm(range(runs)):
    agent = DQNAgent(state_dim, action_dim)
    DQag_drift[:, i] = run_agent_dt(apply_drift=True,drift_dis_type='similar')



data_eps = {'without': np.apply_along_axis(np.mean, 1, DQag), 
            'drift': np.apply_along_axis(np.mean, 1, DQag_drift)}

df_eps = pd.DataFrame(data=data_eps)
#print(df_eps.shape)
#print(data_eps)
fug = px.line(data_eps, y=['without','drift'])
fug.update_layout(xaxis_title = 'Episodes', yaxis_title = 'Average sum of reward')
fug.show()

  0%|          | 0/5 [00:00<?, ?it/s]


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the defaul

  0%|          | 0/5 [00:00<?, ?it/s]


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 500
drift happen
1.0 [0.01603948 0.14497224 0.57001851 0.24676126 0.02220851]
new prob 1.0
1.0 [0.13498952 0.57497755 0.2481293  0.01945525 0.02244837]
new prob 1.0
1.0 [0.59183673 0.18367347 0.1377551  0.05612245 0.03061224]
new prob 1.0
1.0 [0.17118402 0.5149786  0.26248217 0.0256776  0.0256776 ]
new prob 1.0
1.0 [0.54969749 0.25842697 0.0207433  0.14952463 0.02160761]
new prob 0.9999999999999999
1.0000000000000002 [0.21645022 0.16883117 0.54978355 0.03463203 0.03030303]
new prob 1.0
0.9999999999999999 [0.44444444 0.00854701 0.37606838 0.06837607 0.1025641 ]
new prob 1.0
1.0 [0.18297872 0.14893617 0.14893617 0.49787234 0.0212766 ]
new prob 1.0
1.0000000000000002 [0.51951548 0.21265141 0.03633917 0.08748318 0.14401077]
new prob 1.0
1.0 [0.51898734 0.07594937 0.08860759 0.21518987 0.10126582]
new prob 1.0
1.0 [0.49152542 0.0720339  0.19915254 0.19915254 0.03813559]
new prob 1.0
0.9999999999999999 [0.49450549 0.1978022  0.13186813 0.13736264 0.03846154]
new prob


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 500
drift happen
1.0 [0.01603948 0.14497224 0.57001851 0.24676126 0.02220851]
new prob 1.0
1.0 [0.13498952 0.57497755 0.2481293  0.01945525 0.02244837]
new prob 1.0
1.0 [0.59183673 0.18367347 0.1377551  0.05612245 0.03061224]
new prob 1.0
1.0 [0.17118402 0.5149786  0.26248217 0.0256776  0.0256776 ]
new prob 1.0
1.0 [0.54969749 0.25842697 0.0207433  0.14952463 0.02160761]
new prob 0.9999999999999999
1.0000000000000002 [0.21645022 0.16883117 0.54978355 0.03463203 0.03030303]
new prob 1.0
0.9999999999999999 [0.44444444 0.00854701 0.37606838 0.06837607 0.1025641 ]
new prob 1.0
1.0 [0.18297872 0.14893617 0.14893617 0.49787234 0.0212766 ]
new prob 1.0
1.0000000000000002 [0.51951548 0.21265141 0.03633917 0.08748318 0.14401077]
new prob 1.0
1.0 [0.51898734 0.07594937 0.08860759 0.21518987 0.10126582]
new prob 1.0
1.0 [0.49152542 0.0720339  0.19915254 0.19915254 0.03813559]
new prob 1.0
0.9999999999999999 [0.49450549 0.1978022  0.13186813 0.13736264 0.03846154]
new prob


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 500
drift happen
1.0 [0.01603948 0.14497224 0.57001851 0.24676126 0.02220851]
new prob 1.0
1.0 [0.13498952 0.57497755 0.2481293  0.01945525 0.02244837]
new prob 1.0
1.0 [0.59183673 0.18367347 0.1377551  0.05612245 0.03061224]
new prob 1.0
1.0 [0.17118402 0.5149786  0.26248217 0.0256776  0.0256776 ]
new prob 1.0
1.0 [0.54969749 0.25842697 0.0207433  0.14952463 0.02160761]
new prob 0.9999999999999999
1.0000000000000002 [0.21645022 0.16883117 0.54978355 0.03463203 0.03030303]
new prob 1.0
0.9999999999999999 [0.44444444 0.00854701 0.37606838 0.06837607 0.1025641 ]
new prob 1.0
1.0 [0.18297872 0.14893617 0.14893617 0.49787234 0.0212766 ]
new prob 1.0
1.0000000000000002 [0.51951548 0.21265141 0.03633917 0.08748318 0.14401077]
new prob 1.0
1.0 [0.51898734 0.07594937 0.08860759 0.21518987 0.10126582]
new prob 1.0
1.0 [0.49152542 0.0720339  0.19915254 0.19915254 0.03813559]
new prob 1.0
0.9999999999999999 [0.49450549 0.1978022  0.13186813 0.13736264 0.03846154]
new prob


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 500
drift happen
1.0 [0.01603948 0.14497224 0.57001851 0.24676126 0.02220851]
new prob 1.0
1.0 [0.13498952 0.57497755 0.2481293  0.01945525 0.02244837]
new prob 1.0
1.0 [0.59183673 0.18367347 0.1377551  0.05612245 0.03061224]
new prob 1.0
1.0 [0.17118402 0.5149786  0.26248217 0.0256776  0.0256776 ]
new prob 1.0
1.0 [0.54969749 0.25842697 0.0207433  0.14952463 0.02160761]
new prob 0.9999999999999999
1.0000000000000002 [0.21645022 0.16883117 0.54978355 0.03463203 0.03030303]
new prob 1.0
0.9999999999999999 [0.44444444 0.00854701 0.37606838 0.06837607 0.1025641 ]
new prob 1.0
1.0 [0.18297872 0.14893617 0.14893617 0.49787234 0.0212766 ]
new prob 1.0
1.0000000000000002 [0.51951548 0.21265141 0.03633917 0.08748318 0.14401077]
new prob 1.0
1.0 [0.51898734 0.07594937 0.08860759 0.21518987 0.10126582]
new prob 1.0
1.0 [0.49152542 0.0720339  0.19915254 0.19915254 0.03813559]
new prob 1.0
0.9999999999999999 [0.49450549 0.1978022  0.13186813 0.13736264 0.03846154]
new prob


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Drift applied at episode 500
drift happen
1.0 [0.01603948 0.14497224 0.57001851 0.24676126 0.02220851]
new prob 1.0
1.0 [0.13498952 0.57497755 0.2481293  0.01945525 0.02244837]
new prob 1.0
1.0 [0.59183673 0.18367347 0.1377551  0.05612245 0.03061224]
new prob 1.0
1.0 [0.17118402 0.5149786  0.26248217 0.0256776  0.0256776 ]
new prob 1.0
1.0 [0.54969749 0.25842697 0.0207433  0.14952463 0.02160761]
new prob 0.9999999999999999
1.0000000000000002 [0.21645022 0.16883117 0.54978355 0.03463203 0.03030303]
new prob 1.0
0.9999999999999999 [0.44444444 0.00854701 0.37606838 0.06837607 0.1025641 ]
new prob 1.0
1.0 [0.18297872 0.14893617 0.14893617 0.49787234 0.0212766 ]
new prob 1.0
1.0000000000000002 [0.51951548 0.21265141 0.03633917 0.08748318 0.14401077]
new prob 1.0
1.0 [0.51898734 0.07594937 0.08860759 0.21518987 0.10126582]
new prob 1.0
1.0 [0.49152542 0.0720339  0.19915254 0.19915254 0.03813559]
new prob 1.0
0.9999999999999999 [0.49450549 0.1978022  0.13186813 0.13736264 0.03846154]
new prob