In [1]:
import torch
import torch.nn as nn

Fully connected layer with one unit. A sigmoid activation makes it a logistic regression (binary linear classifier)

In [2]:
NUM_INPUTS = 10
logistic_regression = nn.Sequential(
    nn.Linear(NUM_INPUTS,1),
    nn.Sigmoid()
)

Fully connected layer with one unit. No activation makes it a linear regression:

In [3]:
NUM_INPUTS = 10
linear_regression = nn.Sequential(
    nn.Linear(NUM_INPUTS,1),
)

Fully connected layer with many units. Softmax activation makes it a “softmax classifier”.

In [5]:
NUM_INPUTS = 10
NUM_OUTPUTS = 5
softmax_classifier = nn.Sequential(
    nn.Linear(NUM_INPUTS,NUM_OUTPUTS),
    nn.LogSoftmax(dim=1)
)

Many fully connected layers with many units.

In [7]:
NUM_INPUTS = 100
HIDDEN_SIZES = 1024
NUM_OUTPUTS = 20

mlp = nn.Sequential(
    nn.Linear(NUM_INPUTS, HIDDEN_SIZES),
    nn.Tanh(),
    nn.Linear(HIDDEN_SIZES, HIDDEN_SIZES),
    nn.Tanh(),
    nn.Linear(HIDDEN_SIZES, NUM_OUTPUTS),
    nn.LogSoftmax(dim=1)
)

We usually take one-hot codes as discrete tokens. Can we use a Linear layer to process it?

In [8]:
VOCAB_SIZE = 10000
HIDDEN_SIZE = 100
#mapping a vocabulary of size 10,000 to HIDDEN_SIZE projections
emb_1 = nn.Linear(VOCAB_SIZE, HIDDEN_SIZE)

# forward example [10, 10000] tensor
code = [1] + [0]*9999
# copy 10 times the same code [1 0 0 0 ... 0]
x = torch.Tensor([code] * 10)
print("Input x tensor size: ", x.size())
y = emb_1(x)
print("Input y tensor size: ", y.size())

Input x tensor size:  torch.Size([10, 10000])
Input y tensor size:  torch.Size([10, 100])


In [9]:
VOCAB_SIZE = 10000
HIDDEN_SIZE = 100
#mapping a vocabulary of size 10,000 to HIDDEN_SIZE projections
emb_2 = nn.Embedding(VOCAB_SIZE, HIDDEN_SIZE)

x = torch.zeros(10, 1).long()
print('Input x tensor size: ' , x.size())
y = emb_2(x)
print('Input y tensor size: ' , y.size())

Input x tensor size:  torch.Size([10, 1])
Input y tensor size:  torch.Size([10, 1, 100])


In [10]:
def one_hot_embedding(labels, num_classes):
    '''Embedding labels to one-hot.

    Args:
      labels: (LongTensor) class labels, sized [N,].
      num_classes: (int) number of classes.

    Returns:
      (tensor) encoded labels, sized [N,#classes].
    '''
    y = torch.eye(num_classes, device='cpu')  # [D,D]
    return y[labels]  # [N,D]
    '''
    创建   num_classes维度的单位矩阵
    然后取出单位矩阵的某一行，即可以作为 one-hot vector
    '''

labels=2
num_classes=10
one_hot_vector=one_hot_embedding(labels,num_classes)
print(one_hot_vector)
#tensor([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.])

tensor([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.])
