In [1]:
import torch
from torch import nn
from torch.nn import functional as F

In [2]:
X = torch.randn(3, 4, device="cuda")

In [3]:
class Mysequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for arg in args:
            self._modules[arg] = arg
    def forward(self, x):
        for block in self._modules.values():
            x = block(x)
        return x

In [4]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(4, 2)
        self.out = nn.Linear(2, 2)
    def forward(self, x):
        return F.softmax(self.out(F.relu(self.hidden(x))), dim=1)

In [5]:
net = Mysequential(nn.Linear(4, 2), nn.ReLU(), nn.Linear(2, 2), nn.Softmax(dim=1))
if torch.cuda.is_available():
    net.to("cuda")
net(X)

OrderedDict([(Linear(in_features=4, out_features=2, bias=True),
              Linear(in_features=4, out_features=2, bias=True)),
             (ReLU(), ReLU()),
             (Linear(in_features=2, out_features=2, bias=True),
              Linear(in_features=2, out_features=2, bias=True)),
             (Softmax(dim=1), Softmax(dim=1))])

In [6]:
net1 = MLP()
torch.save(net1.state_dict(), "./data/mlp_paramters.pt")
net1.load_state_dict(torch.load("./data/mlp_paramters.pt"))
net1.eval(), net1.state_dict()

(MLP(
   (hidden): Linear(in_features=4, out_features=2, bias=True)
   (out): Linear(in_features=2, out_features=2, bias=True)
 ),
 OrderedDict([('hidden.weight',
               tensor([[ 0.1849,  0.3872, -0.2210,  0.0789],
                       [ 0.0913,  0.4707,  0.0143, -0.0799]])),
              ('hidden.bias', tensor([-0.2144, -0.0146])),
              ('out.weight',
               tensor([[-0.6844,  0.5484],
                       [-0.3093,  0.4149]])),
              ('out.bias', tensor([0.4019, 0.2954]))]))

In [7]:
list(net1.parameters())

[Parameter containing:
 tensor([[ 0.1849,  0.3872, -0.2210,  0.0789],
         [ 0.0913,  0.4707,  0.0143, -0.0799]], requires_grad=True),
 Parameter containing:
 tensor([-0.2144, -0.0146], requires_grad=True),
 Parameter containing:
 tensor([[-0.6844,  0.5484],
         [-0.3093,  0.4149]], requires_grad=True),
 Parameter containing:
 tensor([0.4019, 0.2954], requires_grad=True)]