In [1]:
import torch as t
from torch import nn
from torch.autograd import Variable as V

In [9]:
class Linear(nn.Module):
    def __init__(self, in_features, out_features):
        super(Linear, self).__init__()
        self.w = nn.Parameter(t.randn(in_features, out_features))
        self.b = nn.Parameter(t.randn(out_features))

    def forward(self, x):
        x = x.mm(self.w)
        return x + self.b.expand_as(x)


In [11]:
layer = Linear(4, 3)
input = V(t.randn(2, 4))
output = layer(input)
output

tensor([[-0.2058, -1.6346, -1.3626],
        [-2.1448, -4.4786, -2.9667]], grad_fn=<AddBackward0>)

In [13]:
input

tensor([[-0.5647, -0.1169,  1.7913,  0.8200],
        [-1.2142, -0.4016, -1.2982,  0.6233]])

In [21]:
layer.w, layer.w.shape

(Parameter containing:
 tensor([[ 1.8562,  1.6121,  0.5901],
         [-2.1290,  1.0315, -0.6647],
         [ 0.4427,  0.5946,  0.4454],
         [-0.1427, -1.6970,  0.1725]], requires_grad=True),
 torch.Size([4, 3]))

In [23]:
layer.b, layer.b.shape

(Parameter containing:
 tensor([-0.0826, -0.2771, -2.0465], requires_grad=True),
 torch.Size([3]))

In [27]:
layer.b.expand_as(layer.w)

tensor([[-0.0826, -0.2771, -2.0465],
        [-0.0826, -0.2771, -2.0465],
        [-0.0826, -0.2771, -2.0465],
        [-0.0826, -0.2771, -2.0465]], grad_fn=<ExpandBackward0>)

In [29]:
for name, parameter in layer.named_parameters():
    print(name, parameter)

w Parameter containing:
tensor([[ 1.8562,  1.6121,  0.5901],
        [-2.1290,  1.0315, -0.6647],
        [ 0.4427,  0.5946,  0.4454],
        [-0.1427, -1.6970,  0.1725]], requires_grad=True)
b Parameter containing:
tensor([-0.0826, -0.2771, -2.0465], requires_grad=True)


In [43]:
class Perceptron(nn.Module):
    def __init__(self, in_features, hidden_features, out_features):
        nn.Module.__init__(self)
        self.layer1 = Linear(in_features, hidden_features)
        self.layer2 = Linear(hidden_features, out_features)
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = t.sigmoid(out)
        return out

In [47]:
perceptron = Perceptron(3, 4, 1)
for name, param in perceptron.named_parameters():
    print(name, param.size())

layer1.w torch.Size([3, 4])
layer1.b torch.Size([4])
layer2.w torch.Size([4, 1])
layer2.b torch.Size([1])


In [51]:
relu = nn.ReLU(inplace  = True)
input = V(t.randn(2, 3))
print(input)
output = relu(input)
print(output)

tensor([[ 0.7565, -0.1800,  0.6593],
        [-1.2865,  0.4160, -0.6528]])
tensor([[0.7565, 0.0000, 0.6593],
        [0.0000, 0.4160, 0.0000]])


In [53]:
net1 = nn.Sequential()
net1.add_module('conv', nn.Conv2d(3, 3, 3))
net1.add_module('batchnorm', nn.BatchNorm2d(3))
net1.add_module('activation_layer', nn.ReLU())

net2 = nn.Sequential(
    nn.Conv2d(3, 3, 3),
    nn.BatchNorm2d(3),
    nn.ReLU()
)

from collections import OrderedDict
net3 = nn.Sequential(OrderedDict([
    ('conv1', nn.Conv2d(3, 3, 3)),
    ('bn1', nn.BatchNorm2d(3)),
    ('relu1', nn.ReLU())
]))
print('net1', net1)
print('net2', net2)
print('net3', net3)

net1 Sequential(
  (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation_layer): ReLU()
)
net2 Sequential(
  (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
  (1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
)
net3 Sequential(
  (conv1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
)


In [55]:
net1.conv, net2[0], net3.conv1

(Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1)),
 Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1)),
 Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1)))

In [57]:
from torch.nn import init
linear = nn.Linear(3, 4)
linear

Linear(in_features=3, out_features=4, bias=True)

In [59]:
t.manual_seed(1)

<torch._C.Generator at 0x1e2c40e3530>

In [61]:
linear.weight

Parameter containing:
tensor([[-0.1404,  0.1607,  0.5435],
        [ 0.0867, -0.0591, -0.4624],
        [ 0.2085,  0.3310,  0.1339],
        [-0.2273, -0.3168, -0.3913]], requires_grad=True)

In [63]:
init.xavier_normal(linear.weight)

  init.xavier_normal(linear.weight)


Parameter containing:
tensor([[ 0.3535,  0.1427,  0.0330],
        [ 0.3321, -0.2416, -0.0888],
        [-0.8140,  0.2040, -0.5493],
        [-0.3010, -0.4769, -0.0311]], requires_grad=True)

In [67]:
import math
t.manual_seed(1)

std = math.sqrt(2)/math.sqrt(7.)
linear.weight.data.normal_(0, std)

tensor([[ 0.3535,  0.1427,  0.0330],
        [ 0.3321, -0.2416, -0.0888],
        [-0.8140,  0.2040, -0.5493],
        [-0.3010, -0.4769, -0.0311]])