# Notebook

## 一些关于Dist

In [25]:
import torch
from torch.distributions import Normal

In [26]:
dist=Normal(loc=0,scale=2)

In [27]:
action=dist.sample()

In [28]:
entropy=dist.entropy()

In [31]:
dist.cdf(torch.tensor(1))

tensor(0.6915)

In [48]:
import torch
import torch.nn as nn
from torch.distributions import Categorical

# 定义 Actor 网络
class Actor(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(state_dim, 64)
        self.fc2 = nn.Linear(64, action_dim)

    def forward(self, state):
        x = torch.relu(self.fc1(state))
        x = self.fc2(x)
        logits=torch.softmax(x,dim=-1)
        print(x,logits)
        dist = Categorical(logits=logits)
        return dist

# 创建 Actor 网络
actor = Actor(10, 4)

# 从 Actor 网络生成分布
state = torch.rand(1, 10)
dist = actor(state)

# 从分布中采样一个动作
action = dist.sample()

# 计算分布的熵
entropy = dist.entropy()


tensor([[-0.0560,  0.0762,  0.0540,  0.2109]], grad_fn=<AddmmBackward0>) tensor([[0.2191, 0.2501, 0.2446, 0.2862]], grad_fn=<SoftmaxBackward0>)


In [19]:
import torch
import torch.nn as nn
import network_sim
import gym

env=gym.make('PccNs-v0')
n_states=env.observation_space.shape[0]

# 定义模块列表
modules = [
    nn.Linear(n_states, 10,dtype=float),  # 输入大小为 1，输出大小为 10 的线性层
    nn.ReLU(),  # ReLU 激活函数
    nn.Linear(10, 1,dtype=float)  # 输入大小为 10，输出大小为 1 的线性层
]

# 创建顺序模型
model = nn.Sequential(*modules)

# 输入数据
state=env.reset()
input = torch.from_numpy(state) # 形状为 (批次大小, 输入大小) 的张量

# 前向传递
output = model(input)  # 形状为 (批次大小, 输出大小) 的张量


History length: 10
Features: ['sent latency inflation', 'latency ratio', 'send ratio']
Getting min obs for ['sent latency inflation', 'latency ratio', 'send ratio']
Reward: 0.00, Ewma Reward: 0.00




In [18]:
output

tensor([-0.2016], dtype=torch.float64, grad_fn=<ViewBackward0>)

## 一些关于SB3

In [2]:
import gymnasium as gym

from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnNoModelImprovement

# Separate evaluation env
eval_env = gym.make("Pendulum-v1")
# Stop training if there is no improvement after more than 3 evaluations
stop_train_callback = StopTrainingOnNoModelImprovement(max_no_improvement_evals=3, min_evals=5, verbose=1)
eval_callback = EvalCallback(eval_env, eval_freq=1000, callback_after_eval=stop_train_callback, verbose=1)

model = SAC("MlpPolicy", "Pendulum-v1", learning_rate=1e-3, verbose=1)
# Almost infinite number of timesteps, but the training will stop early
# as soon as the the number of consecutive evaluations without model
# improvement is greater than 3
model.learn(int(1e10), callback=eval_callback,progress_bar=True)

Output()

Using cuda device
Creating environment from the given name 'Pendulum-v1'
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 200       |
|    ep_rew_mean     | -1.68e+03 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 245       |
|    time_elapsed    | 3         |
|    total_timesteps | 800       |
| train/             |           |
|    actor_loss      | 30.2      |
|    critic_loss     | 0.0579    |
|    ent_coef        | 0.502     |
|    ent_coef_loss   | -1.04     |
|    learning_rate   | 0.001     |
|    n_updates       | 699       |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 200       |
|    mean_reward     | -1.71e+03 |
| time/              |           |
|    total_timesteps | 1000      |
| train/             |           |
|    actor_loss      | 37.7      |
|    critic_loss     | 0.0411    |
|    ent_coef        | 0.414     |
|    ent_coef_loss   | -1.36     |
|    learning_rate   | 0.001     |
|    n_updates       | 899       |
----------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 200      |
|    ep_rew_mean     | -1.6e+03 |
| time/              |          |
|    episodes        | 8        |
|    fps             | 223      |
|    time_elapsed    | 7        |
|    total_timesteps | 1600     |
| train/             |          |
|    actor_loss      | 56.5     |
|    critic_loss     | 0.0517   |
|    ent_coef        | 0.254    |
|    ent_coef_loss   | -1.06    |
|    learning_rate   | 0.001    |
|    n_updates       | 1499     |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -865     |
| time/              |          |
|    total_timesteps | 2000     |
| train/             |          |
|    actor_loss      | 65.8     |
|    critic_loss     | 0.0994   |
|    ent_coef        | 0.215    |
|    ent_coef_loss   | -0.444   |
|    learning_rate   | 0.001    |
|    n_updates       | 1899     |
---------------------------------


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 200       |
|    ep_rew_mean     | -1.39e+03 |
| time/              |           |
|    episodes        | 12        |
|    fps             | 214       |
|    time_elapsed    | 11        |
|    total_timesteps | 2400      |
| train/             |           |
|    actor_loss      | 73.1      |
|    critic_loss     | 0.265     |
|    ent_coef        | 0.207     |
|    ent_coef_loss   | -0.0341   |
|    learning_rate   | 0.001     |
|    n_updates       | 2299      |
----------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -617     |
| time/              |          |
|    total_timesteps | 3000     |
| train/             |          |
|    actor_loss      | 80.5     |
|    critic_loss     | 0.53     |
|    ent_coef        | 0.225    |
|    ent_coef_loss   | 0.165    |
|    learning_rate   | 0.001    |
|    n_updates       | 2899     |
---------------------------------


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 200       |
|    ep_rew_mean     | -1.18e+03 |
| time/              |           |
|    episodes        | 16        |
|    fps             | 211       |
|    time_elapsed    | 15        |
|    total_timesteps | 3200      |
| train/             |           |
|    actor_loss      | 85.5      |
|    critic_loss     | 0.382     |
|    ent_coef        | 0.231     |
|    ent_coef_loss   | 0.143     |
|    learning_rate   | 0.001     |
|    n_updates       | 3099      |
----------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -152     |
| time/              |          |
|    total_timesteps | 4000     |
| train/             |          |
|    actor_loss      | 76.4     |
|    critic_loss     | 0.817    |
|    ent_coef        | 0.221    |
|    ent_coef_loss   | -0.0414  |
|    learning_rate   | 0.001    |
|    n_updates       | 3899     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 200      |
|    ep_rew_mean     | -980     |
| time/              |          |
|    episodes        | 20       |
|    fps             | 210      |
|    time_elapsed    | 19       |
|    total_timesteps | 4000     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 200      |
|    ep_rew_mean     | -843     |
| time/              |          |
|    episodes        | 24       |
|    fps             | 212      |
|    time_elapsed    | 22       |
|    total_timesteps | 4800     |
| train/             |          |
|    actor_loss      | 81.3     |
|    critic_loss     | 1.65     |
|    ent_coef        | 0.162    |
|    ent_coef_loss   | 0.194    |
|    learning_rate   | 0.001    |
|    n_updates       | 4699     |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -99.5    |
| time/              |          |
|    total_timesteps | 5000     |
| train/             |          |
|    actor_loss      | 81.9     |
|    critic_loss     | 2.14     |
|    ent_coef        | 0.151    |
|    ent_coef_loss   | 0.081    |
|    learning_rate   | 0.001    |
|    n_updates       | 4899     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 200      |
|    ep_rew_mean     | -744     |
| time/              |          |
|    episodes        | 28       |
|    fps             | 210      |
|    time_elapsed    | 26       |
|    total_timesteps | 5600     |
| train/             |          |
|    actor_loss      | 76.6     |
|    critic_loss     | 1.56     |
|    ent_coef        | 0.122    |
|    ent_coef_loss   | -0.184   |
|    learning_rate   | 0.001    |
|    n_updates       | 5499     |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -150     |
| time/              |          |
|    total_timesteps | 6000     |
| train/             |          |
|    actor_loss      | 78.4     |
|    critic_loss     | 1.57     |
|    ent_coef        | 0.108    |
|    ent_coef_loss   | -0.186   |
|    learning_rate   | 0.001    |
|    n_updates       | 5899     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 200      |
|    ep_rew_mean     | -671     |
| time/              |          |
|    episodes        | 32       |
|    fps             | 209      |
|    time_elapsed    | 30       |
|    total_timesteps | 6400     |
| train/             |          |
|    actor_loss      | 77.8     |
|    critic_loss     | 1.03     |
|    ent_coef        | 0.0977   |
|    ent_coef_loss   | 0.229    |
|    learning_rate   | 0.001    |
|    n_updates       | 6299     |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -99.2    |
| time/              |          |
|    total_timesteps | 7000     |
| train/             |          |
|    actor_loss      | 73.5     |
|    critic_loss     | 1.89     |
|    ent_coef        | 0.0914   |
|    ent_coef_loss   | -0.25    |
|    learning_rate   | 0.001    |
|    n_updates       | 6899     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 200      |
|    ep_rew_mean     | -607     |
| time/              |          |
|    episodes        | 36       |
|    fps             | 208      |
|    time_elapsed    | 34       |
|    total_timesteps | 7200     |
| train/             |          |
|    actor_loss      | 77.4     |
|    critic_loss     | 1.38     |
|    ent_coef        | 0.0887   |
|    ent_coef_loss   | -0.272   |
|    learning_rate   | 0.001    |
|    n_updates       | 7099     |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -143     |
| time/              |          |
|    total_timesteps | 8000     |
| train/             |          |
|    actor_loss      | 65.1     |
|    critic_loss     | 1.45     |
|    ent_coef        | 0.0815   |
|    ent_coef_loss   | -0.0641  |
|    learning_rate   | 0.001    |
|    n_updates       | 7899     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 200      |
|    ep_rew_mean     | -560     |
| time/              |          |
|    episodes        | 40       |
|    fps             | 208      |
|    time_elapsed    | 38       |
|    total_timesteps | 8000     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 200      |
|    ep_rew_mean     | -523     |
| time/              |          |
|    episodes        | 44       |
|    fps             | 209      |
|    time_elapsed    | 41       |
|    total_timesteps | 8800     |
| train/             |          |
|    actor_loss      | 71.6     |
|    critic_loss     | 1.71     |
|    ent_coef        | 0.0742   |
|    ent_coef_loss   | 0.426    |
|    learning_rate   | 0.001    |
|    n_updates       | 8699     |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -197     |
| time/              |          |
|    total_timesteps | 9000     |
| train/             |          |
|    actor_loss      | 67.4     |
|    critic_loss     | 2.09     |
|    ent_coef        | 0.0728   |
|    ent_coef_loss   | -0.334   |
|    learning_rate   | 0.001    |
|    n_updates       | 8899     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 200      |
|    ep_rew_mean     | -493     |
| time/              |          |
|    episodes        | 48       |
|    fps             | 209      |
|    time_elapsed    | 45       |
|    total_timesteps | 9600     |
| train/             |          |
|    actor_loss      | 73.3     |
|    critic_loss     | 1.33     |
|    ent_coef        | 0.0643   |
|    ent_coef_loss   | -0.0662  |
|    learning_rate   | 0.001    |
|    n_updates       | 9499     |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -146     |
| time/              |          |
|    total_timesteps | 10000    |
| train/             |          |
|    actor_loss      | 66       |
|    critic_loss     | 1.62     |
|    ent_coef        | 0.053    |
|    ent_coef_loss   | -0.0479  |
|    learning_rate   | 0.001    |
|    n_updates       | 9899     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 200      |
|    ep_rew_mean     | -465     |
| time/              |          |
|    episodes        | 52       |
|    fps             | 208      |
|    time_elapsed    | 49       |
|    total_timesteps | 10400    |
| train/             |          |
|    actor_loss      | 59.7     |
|    critic_loss     | 1.38     |
|    ent_coef        | 0.0622   |
|    ent_coef_loss   | -0.349   |
|    learning_rate   | 0.001    |
|    n_updates       | 10299    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -159     |
| time/              |          |
|    total_timesteps | 11000    |
| train/             |          |
|    actor_loss      | 54.9     |
|    critic_loss     | 1.09     |
|    ent_coef        | 0.0897   |
|    ent_coef_loss   | -0.415   |
|    learning_rate   | 0.001    |
|    n_updates       | 10899    |
---------------------------------


<stable_baselines3.sac.sac.SAC at 0x7f812c206220>