-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnetwork.py
90 lines (76 loc) · 3.33 KB
/
network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import numpy as np
import torch.nn
import parameters as pm
import torch.nn as nn
import torch.nn.functional as F # 激励函数都在这
import torch.optim as optim
class PNet(nn.Module):
def __init__(self):
super(PNet, self).__init__()
# type, arrival, progress, resource
dense_net_1 = nn.Linear(pm.STATE_DIM[1], pm.NUM_NEURONS_PER_FCN)
self.linears = nn.ModuleList()
self.linears.append(dense_net_1)
self.linears.append(nn.ReLU())
for i in range(1, pm.NUM_FCN_LAYERS):
self.linears.append(nn.Linear(pm.NUM_NEURONS_PER_FCN, pm.NUM_NEURONS_PER_FCN))
self.linears.append(nn.ReLU())
output = nn.Linear(pm.NUM_NEURONS_PER_FCN, pm.ACTION_DIM)
self.linears.append(output)
def forward(self, x):
for i, l in enumerate(self.linears):
x = l(x)
x = F.softmax(x)
return x
class PolicyNetwork:
def __init__(self, scope, mode, logger):
self.scope = scope
self.mode = mode
self.logger = logger
self.net = PNet()
self.lr = pm.LEARNING_RATE
if pm.OPTIMIZER == "Adam":
self.optimizer = optim.Adam(self.net.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0,
amsgrad=False)
elif pm.OPTIMIZER == "RMSProp":
self.optimizer = optim.RMSprop(self.net.parameters(), lr=self.lr, alpha=0.99, eps=1e-08, weight_decay=0,
momentum=0, centered=False)
if self.mode == "SL":
if pm.SL_LOSS_FUNCTION == "Mean_Square":
self.criterion = nn.MSELoss()
elif pm.SL_LOSS_FUNCTION == "Cross_Entropy":
self.criterion = nn.CrossEntropyLoss()
elif pm.SL_LOSS_FUNCTION == "Absolute_Difference":
self.criterion = nn.L1Loss()
def get_weights(self):
return list(self.net.parameters()) #
def predict(self, inputs: torch.tensor) -> torch.tensor:
output = self.net(inputs.float())
return output
def get_sl_loss(self, inputs: torch.Tensor, label):
assert self.mode == "SL"
return self.net(inputs.float()), self.criterion(self.net(inputs.float()), label.long())
# adjust entropy weight
def anneal_entropy_weight(self, step):
if pm.FIX_ENTROPY_WEIGHT:
self.entropy_weight = pm.ENTROPY_WEIGHT
else:
self.entropy_weight = max(pm.MAX_ENTROPY_WEIGHT * 2 / (1 + np.exp(step / pm.ANNEALING_TEMPERATURE)), 0.1)
class VNet(nn.Module):
def __init__(self):
super(VNet, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
class ValueNetwork:
def __init__(self, scope, mode, logger):
self.scope = scope
self.mode = mode
self.logger = logger
self.net = PNet()
self.lr = pm.LEARNING_RATE
# todo : this is demo network , need carefully designed.
if pm.OPTIMIZER == "Adam":
self.optimize = optim.Adam(self.net.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0,
amsgrad=False)
elif pm.OPTIMIZER == "RMSProp":
self.optimize = optim.RMSprop(self.net.parameters(), lr=self.lr, alpha=0.99, eps=1e-08, weight_decay=0,
momentum=0, centered=False)