torch.nn.Module and torch.nn.Parameter

In [6]:
import torch

class TinyModel(torch.nn.Module):

    def __init__(self):
        super(TinyModel, self).__init__()

        self.linear1 = torch.nn.Linear(100, 200)
        self.activation = torch.nn.ReLU()
        self.linear2 = torch.nn.Linear(200, 10)
        self.softmax = torch.nn.Softmax()

    def forward(self, x):
        x = self.linear1(x)
        x = self.activation(x)
        x = self.linear2(x)
        x = self.softmax(x)
        return x

tinymodel = TinyModel()

print('The model:')
print(tinymodel)

print('\n\nJust one layer:')
print(tinymodel.linear2)

print('\n\n Model params:')
for param in tinymodel.parameters():
    print(param)

print('\n\nLayer params:')
for param in tinymodel.linear2.parameters():
    print(param)

The model:
TinyModel(
  (linear1): Linear(in_features=100, out_features=200, bias=True)
  (activation): ReLU()
  (linear2): Linear(in_features=200, out_features=10, bias=True)
  (softmax): Softmax(dim=None)
)


Just one layer:
Linear(in_features=200, out_features=10, bias=True)


 Model params:
Parameter containing:
tensor([[ 0.0775, -0.0466, -0.0823,  ...,  0.0309,  0.0463,  0.0428],
        [-0.0781, -0.0686,  0.0975,  ..., -0.0794,  0.0393,  0.0911],
        [ 0.0742,  0.0978,  0.0943,  ..., -0.0161, -0.0011, -0.0035],
        ...,
        [ 0.0099, -0.0698, -0.0138,  ...,  0.0255, -0.0374,  0.0363],
        [-0.0115,  0.0733,  0.0282,  ..., -0.0817,  0.0057,  0.0434],
        [-0.0293, -0.0277,  0.0653,  ...,  0.0476,  0.0951,  0.0572]],
       requires_grad=True)
Parameter containing:
tensor([-0.0904, -0.0416,  0.0679,  0.0950, -0.0284,  0.0648, -0.0184, -0.0779,
         0.0393, -0.0329,  0.0526,  0.0494, -0.0908, -0.0234,  0.0896, -0.0008,
        -0.0685, -0.0806,  0.0136, -0.0

Common Layer Types

In [8]:
lin = torch.nn.Linear(3, 2)
x = torch.rand(1, 3)
print('Input:')
print(x)

print('\n\nWeight and Bias parameters:')
for param in lin.parameters():
    print(param)

y = lin(x)
print('\n\nOutput:')
print(y)

Input:
tensor([[0.7104, 0.3575, 0.1890]])


Weight and Bias parameters:
Parameter containing:
tensor([[-0.3872,  0.4567, -0.2395],
        [ 0.2752,  0.2803,  0.5092]], requires_grad=True)
Parameter containing:
tensor([ 0.0759, -0.1720], requires_grad=True)


Output:
tensor([[-0.0812,  0.2199]], grad_fn=<AddmmBackward0>)


Convolutional Layers

In [10]:
import torch.functional as F


class LeNet(torch.nn.Module):

    def __init__(self):
        super(LeNet, self).__init__()
        # 1 input image channel (black & white), 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = torch.nn.Conv2d(1, 6, 5)
        self.conv2 = torch.nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = torch.nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
        self.fc2 = torch.nn.Linear(120, 84)
        self.fc3 = torch.nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [11]:
class LSTMTagger(torch.nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, target_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = torch.nn.Embedding(vocab_size, embedding_dim)
        self.lstm = torch.nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2tag = torch.nn.Linear(hidden_dim, target_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

Other Layers and Functions

In [12]:
my_tensor = torch.rand(1, 6, 6)
print(my_tensor)

maxpool_layer = torch.nn.MaxPool2d(3)
print(maxpool_layer(my_tensor))

tensor([[[0.4545, 0.4162, 0.0879, 0.7534, 0.7398, 0.3373],
         [0.5692, 0.1075, 0.3703, 0.2151, 0.5996, 0.6495],
         [0.2021, 0.5432, 0.9811, 0.2394, 0.3459, 0.3853],
         [0.8899, 0.5216, 0.1839, 0.9940, 0.1797, 0.4341],
         [0.5699, 0.1267, 0.6560, 0.3339, 0.8537, 0.1195],
         [0.4487, 0.6072, 0.3675, 0.7267, 0.7438, 0.0321]]])
tensor([[[0.9811, 0.7534],
         [0.8899, 0.9940]]])


In [16]:
my_tensor = torch.rand(1, 4, 4) * 20 + 5
print(my_tensor)

print(my_tensor.mean())

norm_layer = torch.nn.BatchNorm1d(4)
normed_tensor = norm_layer(my_tensor)
print(norm_layer)
print(normed_tensor.mean())

tensor([[[11.8127, 20.7439, 11.2504, 17.9157],
         [16.5257, 11.3992,  7.6493, 15.3131],
         [ 7.6376, 10.0736,  8.2986,  9.7116],
         [24.6445,  7.0725, 15.7648, 24.9847]]])
tensor(13.7999)
BatchNorm1d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
tensor(-1.7881e-07, grad_fn=<MeanBackward0>)


In [18]:
my_tensor = torch.rand(1, 4, 4)
dropout = torch.nn.Dropout(p=0.4)
print(dropout(my_tensor))
print(dropout(my_tensor))

tensor([[[0.9174, 0.0000, 0.4743, 0.1876],
         [0.9854, 0.0000, 1.3405, 0.0000],
         [0.0000, 1.4480, 0.0000, 0.4914],
         [0.1861, 0.0000, 0.0000, 0.0000]]])
tensor([[[0.9174, 1.7359, 0.0000, 0.1876],
         [0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.4914],
         [0.1861, 0.0000, 0.8889, 0.0000]]])
