In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.distributions import Categorical

In [2]:
class Actor(nn.Module):
    def __init__(self, n_states, n_actions, hidden_dim):
        super(Actor, self).__init__()

        self.actor = nn.Sequential(
            nn.Linear(n_states, hidden_dim,dtype=float),
            nn.Tanh(),
            nn.Linear(hidden_dim, hidden_dim,dtype=float),
            nn.Tanh(),
            nn.Linear(hidden_dim, n_actions,dtype=float),
            nn.Softmax(dim=-1)
        )

    def forward(self, state):
        state=torch.from_numpy(state)
        dist = self.actor(state)
        dist = Categorical(dist)
        entropy = dist.entropy()
        return dist, entropy
        #return dist

In [8]:
# critic:value network
class Critic(nn.Module):
    def __init__(self, n_states, hidden_dim):
        super(Critic, self).__init__()
        self.critic = nn.Sequential(
            nn.Linear(n_states, hidden_dim,dtype=float),
            nn.Tanh(),
            nn.Linear(hidden_dim, hidden_dim,dtype=float),
            nn.Tanh(),
            nn.Linear(hidden_dim, 1,dtype=float)
        )

    def forward(self, state):
        state=torch.from_numpy(state)
        value = self.critic(state)
        return value

In [3]:
import network_sim
import gym

env=gym.make('PccNs-v0')

n_states=env.observation_space.shape[0]
n_actions=env.action_space.shape[0]

policy=Actor(n_states,n_actions,32)

state=env.reset()
done=False
while not done:
    dist,ent=policy.forward(state)
    act=dist.sample().item()
    state,r,done,_=env.step([act])
    print(r)

History length: 10
Features: ['sent latency inflation', 'latency ratio', 'send ratio']
Getting min obs for ['sent latency inflation', 'latency ratio', 'send ratio']
Reward: 0.00, Ewma Reward: 0.00
0.3601326583995361
0.227905849899158
0.227905849899158
0.3368715646290142
0.3368715646290142
0.3368715646290126
0.3368715646290126
0.3368715646290156
0.2279058498991946
0.33687156462905204
0.33687156462905204
0.33687156462905204
0.33687156462905204
0.33687156462905204
0.33687156462905204
0.11894013516933706
0.33687156462905204
0.33687156462905204
0.33687156462905204
0.33687156462905204
0.22790584989919724
0.33687156462904366
0.3368715646289761
0.3368715646289761
0.3368715646289761
0.3368715646289761
0.3368715646289761
0.3368715646289761
0.22790584989912152
0.11894013516926691
0.3368715646289761
0.3368715646289761
0.3368715646289761
0.22790584989912152
0.22790584989912152
0.11894013516926691
0.009974420439412456
0.22790584989912152
0.3368715646289761
0.3368715646289761
0.22790584989912152
0.22



In [9]:
critic=Critic(n_states,32)
value=critic.forward(state)

In [2]:
import numpy as np
s=np.random.randn(5)
import torch
s=torch.FloatTensor(s).cuda()

In [19]:
import torch
class net(torch.nn.Module):
    def __init__(self):
        super(net,self).__init__()
        self.fc=torch.nn.Linear(10,1)
    def forward(self,x):
        return self.fc(x)
        
a=net()
b=net()
c=net()
ao=torch.optim.Adam(a.parameters())
bo=torch.optim.Adam(b.parameters())
co=torch.optim.Adam(c.parameters())
rand=torch.randn(2,10)
al=torch.nn.functional.mse_loss(a(rand[0]),a(rand[1]))
bl=torch.nn.functional.mse_loss(b(rand[0]),b(rand[1]))
cl=torch.nn.functional.mse_loss(c(rand[0]),c(rand[1]))
ao.zero_grad()
al.backward()
ao.step()
bo.zero_grad()
bl.backward()
bo.step()
co.zero_grad()
cl.backward()
co.step()