# Linear Layer

In [40]:
import torch

## Raw Linear Layer

$$\begin{gathered}
y=x\cdot{W}+b, \\
\text{where }x\in\mathbb{R}^{N\times{n}}\text{, }y\in\mathbb{R}^{N\times{m}}. \\
\\
\text{Thus, }W\in\mathbb{R}^{n\times{m}}\text{ and }b\in\mathbb{R}^m.
\end{gathered}$$

In [41]:
W = torch.FloatTensor([[1, 2],
                       [3, 4],
                       [5, 6]])
b = torch.FloatTensor([2, 2])

In [42]:
print(W)
print(b)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([2., 2.])


In [43]:
print(W.size())
print(b.size())

torch.Size([3, 2])
torch.Size([2])


In [44]:
def linear(x, W, b):
    y = torch.matmul(x, W) + b
    
    return y

In [45]:
x = torch.FloatTensor([[1, 1, 1],
                       [2, 2, 2],
                       [3, 3, 3],
                       [4, 4, 4]])

print(x)
print(x.size())

tensor([[1., 1., 1.],
        [2., 2., 2.],
        [3., 3., 3.],
        [4., 4., 4.]])
torch.Size([4, 3])


In [46]:
y = linear(x, W, b)
print(y)

tensor([[11., 14.],
        [20., 26.],
        [29., 38.],
        [38., 50.]])


In [47]:
print(y.size())

torch.Size([4, 2])


In [48]:
# |x| = (N,m)  |W| =(n,m) = (x*W) = (N,M) 
# >> (4,3) * (3,2) = (4,2)

## nn.Module

In [49]:
import torch.nn as nn

In [50]:
class MyLinear(nn.Module):

    def __init__(self, input_dim=3, output_dim=2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        self.W = torch.FloatTensor(input_dim, output_dim)
        self.b = torch.FloatTensor(output_dim)

    # You should override 'forward' method to implement detail.
    # The input arguments and outputs can be designed as you wish.
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = torch.matmul(x, self.W) + self.b
        # |y| = (batch_size, input_dim) * (input_dim, output_dim) (== |x|*W)
        #     = (batch_size, output_dim)
        
        return y

In [51]:
linear = MyLinear(3, 2)

y = linear(x)
print(y)

tensor([[ 60.,  78.],
        [120., 156.],
        [180., 234.],
        [240., 312.]])


In [52]:
print(y.size())

torch.Size([4, 2])


In [53]:
for p in linear.parameters():
    print(p)

# 잘못 코드를 작성한 경우 

You can see that there is no weight parameters to learn.
Above way can forward(or calculate) values, but it cannot be trained.

### Correct way: nn.Parameter

In [65]:
class MyLinear(nn.Module):

    def __init__(self, input_dim=3, output_dim=2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        # 수정: nn.Parameter 를 wrapping 해줘야 함
        self.W = nn.Parameter(torch.FloatTensor(input_dim, output_dim))
        self.b = nn.Parameter(torch.FloatTensor(output_dim))
        
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = torch.matmul(x, self.W) + self.b
        # |y| = (batch_size, input_dim) * (input_dim, output_dim)
        #     = (batch_size, output_dim)
        
        return y

Reference: https://pytorch.org/docs/stable/nn.html#torch.nn.Parameter

A kind of Tensor that is to be considered a module parameter.

Parameters are Tensor subclasses, that have a very special property when used with Module s - when they’re assigned as Module attributes they are automatically added to the list of its parameters, and will appear e.g. in parameters() iterator. Assigning a Tensor doesn’t have such effect. This is because one might want to cache some temporary state, like last hidden state of the RNN, in the model. If there was no such class as Parameter, these temporaries would get registered too.

In [66]:
linear = MyLinear(3, 2)

y = linear(x)
print(y)

tensor([[-1.8891e+26,  1.0602e+01],
        [-1.8891e+26,  2.0602e+01],
        [-1.8891e+26,  3.0602e+01],
        [-1.8891e+26,  4.0602e+01]], grad_fn=<AddBackward0>)


In [67]:
print(y.size())

torch.Size([4, 2])


In [68]:
for p in linear.parameters():
    print(p)

Parameter containing:
tensor([[1.7059e-13, 2.0529e-42],
        [3.0000e+00, 4.0000e+00],
        [5.0000e+00, 6.0000e+00]], requires_grad=True)
Parameter containing:
tensor([-1.8891e+26,  6.0240e-01], requires_grad=True)


## nn.Linear

In [69]:
linear = nn.Linear(3, 2)

y = linear(x)

In [70]:
print(y.size())

torch.Size([4, 2])


In [71]:
for p in linear.parameters():
    print(p)

Parameter containing:
tensor([[-0.4493,  0.0253, -0.4206],
        [ 0.2720,  0.4730, -0.3786]], requires_grad=True)
Parameter containing:
tensor([-0.5471, -0.5520], requires_grad=True)


### nn.Module can contain other nn.Module's child classes.

In [61]:
class MyLinear(nn.Module):
    # nn.Module 상속 받음 
    def __init__(self, input_dim=3, output_dim=2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        self.linear = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = self.linear(x)
        # |y| = (batch_size, output_dim)
        
        return y

In [62]:
linear = MyLinear(3, 2)

y = linear(x)

In [63]:
print(y.size())

torch.Size([4, 2])


In [64]:
for p in linear.parameters():
    print(p)

Parameter containing:
tensor([[ 0.2164, -0.4472,  0.2737],
        [-0.3399, -0.3661, -0.4001]], requires_grad=True)
Parameter containing:
tensor([-0.4102,  0.3529], requires_grad=True)
