In [0]:
# Implemetation of MLP
import torch 
import torch.nn as nn
import torch.nn.functional as F


In [0]:
class MultilayerPerceptron(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        """
        Args:
            input_dim (int): the size of the input vectors
            hidden_dim (int): the output size of the first Linear layer
            output_dim (int): the output size of the second Linear layer
        """
        super(MultilayerPerceptron, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x_in, apply_softmax=False):
        """The forward pass of the MLP
        
        Args:
            x_in (torch.Tensor): an input data tensor. 
                x_in.shape should be (batch, input_dim)
            apply_softmax (bool): a flag for the softmax activation
                should be false if used with the Cross Entropy losses
        Returns:
            the resulting tensor. tensor.shape should be (batch, output_dim)
        """
        intermediate = F.relu(self.fc1(x_in))
        output = self.fc2(intermediate)
        
        if apply_softmax:
            output = F.softmax(output, dim=1)
        return output


In [6]:
batch_size = 2 # number of samples input at once
input_dim = 3
hidden_dim = 100
output_dim = 4

# Initialize model
mlp = MultilayerPerceptron(input_dim, hidden_dim, output_dim)
print(mlp)

MultilayerPerceptron(
  (fc1): Linear(in_features=3, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=4, bias=True)
)


In [0]:
def describe(x):
    print("Type: {}".format(x.type()))
    print("Shape/size: {}".format(x.shape))
    print("Values: \n{}".format(x))

In [8]:
# Inputs
x_input = torch.rand(batch_size, input_dim)
describe(x_input)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0.9306, 0.6729, 0.5635],
        [0.4303, 0.6484, 0.6657]])


In [9]:
y_output = mlp(x_input, apply_softmax=False)
describe(y_output)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 4])
Values: 
tensor([[ 0.0692, -0.2544,  0.1580, -0.0177],
        [ 0.0321, -0.2138,  0.0640, -0.0506]], grad_fn=<AddmmBackward>)


In [10]:
y_output = mlp(x_input, apply_softmax=True)
describe(y_output)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 4])
Values: 
tensor([[0.2679, 0.1938, 0.2927, 0.2456],
        [0.2677, 0.2094, 0.2764, 0.2465]], grad_fn=<SoftmaxBackward>)


In [11]:
class MultilayerPerceptron(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        """
        Args:
            input_dim (int): the size of the input vectors
            hidden_dim (int): the output size of the first Linear layer
            output_dim (int): the output size of the second Linear layer
        """
        super(MultilayerPerceptron, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x_in, apply_softmax=False):
        """The forward pass of the MLP
        
        Args:
            x_in (torch.Tensor): an input data tensor. 
                x_in.shape should be (batch, input_dim)
            apply_softmax (bool): a flag for the softmax activation
                should be false if used with the Cross Entropy losses
        Returns:
            the resulting tensor. tensor.shape should be (batch, output_dim)
        """
        intermediate = F.relu(self.fc1(x_in))
        output = self.fc2(F.dropout(intermediate, p=0.5))
        
        if apply_softmax:
            output = F.softmax(output, dim=1)
        return output

batch_size = 2 # number of samples input at once
input_dim = 3
hidden_dim = 100
output_dim = 4

# Initialize model
mlp = MultilayerPerceptron(input_dim, hidden_dim, output_dim)
print(mlp)

y_output = mlp(x_input, apply_softmax=False)
describe(y_output)

MultilayerPerceptron(
  (fc1): Linear(in_features=3, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=4, bias=True)
)
Type: torch.FloatTensor
Shape/size: torch.Size([2, 4])
Values: 
tensor([[-0.0432, -0.4461, -0.1067,  0.0958],
        [-0.3605,  0.0295, -0.5094, -0.1962]], grad_fn=<AddmmBackward>)


In [12]:
batch_size = 2
one_hot_size = 10
sequence_width = 7
data = torch.randn(batch_size, one_hot_size, sequence_width)
conv1 = nn.Conv1d(in_channels=one_hot_size, out_channels=16, kernel_size=3)
intermediate1 = conv1(data)
print(data.size())
print(intermediate1.size())

torch.Size([2, 10, 7])
torch.Size([2, 16, 5])


In [13]:
conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3)
conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3)

intermediate2 = conv2(intermediate1)
intermediate3 = conv3(intermediate2)

print(intermediate2.size())
print(intermediate3.size())


torch.Size([2, 32, 3])
torch.Size([2, 64, 1])


In [14]:
y_output = intermediate3.squeeze()
print(y_output.size())

torch.Size([2, 64])


In [15]:
intermediate2.mean(dim=0).mean(dim=1).sum()

tensor(1.0443, grad_fn=<SumBackward0>)

In [16]:
# Method 2 of reducing to feature vectors
print(intermediate1.view(batch_size, -1).size())

# Method 3 of reducing to feature vectors
print(torch.mean(intermediate1, dim=2).size())
# print(torch.max(intermediate1, dim=2).size())
# print(torch.sum(intermediate1, dim=2).size())

torch.Size([2, 80])
torch.Size([2, 16])


In [17]:
# The full model will not be reproduced here. Instead, we will just show batch norm being used.

conv1 = nn.Conv1d(in_channels=one_hot_size, out_channels=16, kernel_size=3)
conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3)
conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3)

conv1_bn = nn.BatchNorm1d(num_features=16)
conv2_bn = nn.BatchNorm1d(num_features=32)
    
intermediate1 = conv1_bn(F.relu(conv1(data)))
intermediate2 = conv2_bn(F.relu(conv2(intermediate1)))
intermediate3 = conv3(intermediate2)

print(intermediate1.size())
print(intermediate2.size())
print(intermediate3.size())


torch.Size([2, 16, 5])
torch.Size([2, 32, 3])
torch.Size([2, 64, 1])


BatchNorm computes its statistics over the batch and sequence dimensions. In other words, the input to each batchnorm1d is a tensor of size (B, C, L) (where b=batch, c=channels, and l=length). Each (B, L) slice should have 0-mean. This reduces covariate shift.

In [18]:
intermediate2.mean(dim=(0, 2))

tensor([ 1.8626e-09,  1.9868e-08,  1.9868e-08,  9.9341e-09, -2.9802e-08,
         0.0000e+00, -4.9671e-09, -1.9868e-08,  0.0000e+00,  3.1044e-10,
        -1.9868e-08, -2.4835e-09, -4.9671e-09, -2.9802e-08,  0.0000e+00,
         0.0000e+00,  4.9671e-08, -9.9341e-09,  6.2088e-10,  1.3039e-08,
         9.9341e-09, -1.4901e-08,  9.9341e-09,  0.0000e+00, -2.2352e-08,
        -1.2418e-09, -3.9736e-08,  1.2418e-09, -2.4835e-08,  0.0000e+00,
        -4.9671e-09,  0.0000e+00], grad_fn=<MeanBackward2>)

we cover convolutions. Below are code examples which instantiate the convolutions with various hyper parameter settings.

In [19]:
x = torch.randn(1, 2, 3, 3)
describe(x)

conv1 = nn.Conv2d(in_channels=2, out_channels=1, kernel_size=2)
describe(conv1.weight)
describe(conv1(x))

Type: torch.FloatTensor
Shape/size: torch.Size([1, 2, 3, 3])
Values: 
tensor([[[[-0.8042, -0.9000,  0.2477],
          [ 0.5261, -0.3568, -0.1614],
          [-0.0298,  1.6860,  0.0725]],

         [[-2.1104, -1.1963,  1.0102],
          [-0.9370, -0.8230,  0.8714],
          [ 0.8653, -0.0199,  1.8770]]]])
Type: torch.FloatTensor
Shape/size: torch.Size([1, 2, 2, 2])
Values: 
Parameter containing:
tensor([[[[ 0.1364, -0.3066],
          [-0.2983,  0.2009]],

         [[ 0.1763,  0.2498],
          [-0.3288, -0.2711]]]], requires_grad=True)
Type: torch.FloatTensor
Shape/size: torch.Size([1, 1, 2, 2])
Values: 
tensor([[[[-0.2970, -0.1439],
          [-0.2160, -1.0124]]]], grad_fn=<MkldnnConvolutionBackward>)


In [20]:
x = torch.randn(1, 1, 3, 3)
describe(x)

conv1 = nn.Conv2d(in_channels=1, out_channels=2, kernel_size=2)
describe(conv1.weight)
describe(conv1(x))

Type: torch.FloatTensor
Shape/size: torch.Size([1, 1, 3, 3])
Values: 
tensor([[[[-0.8242, -0.2874, -0.5634],
          [-1.0233,  0.8660, -0.6022],
          [ 0.6489, -1.1430, -1.0285]]]])
Type: torch.FloatTensor
Shape/size: torch.Size([2, 1, 2, 2])
Values: 
Parameter containing:
tensor([[[[-0.4151,  0.4286],
          [-0.4139,  0.3740]]],


        [[[ 0.0969,  0.3149],
          [ 0.3956,  0.4101]]]], requires_grad=True)
Type: torch.FloatTensor
Shape/size: torch.Size([1, 2, 2, 2])
Values: 
tensor([[[[ 1.0425, -0.6297],
          [ 0.1759, -0.4529]],

         [[ 0.1066,  0.2170],
          [ 0.2881, -0.6530]]]], grad_fn=<MkldnnConvolutionBackward>)
