# NNLM (Neural Network Language Model)

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [3]:
class NNLM(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, context_size):
        super(NNLM, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_size)
        self.fc1 = nn.Linear(context_size * embed_size, hidden_size)        # 선형층 통과
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, vocab_size)
        self.log_softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        embeds = self.embed(x)
        embeds = embeds.view(embeds.size(0), -1)
        output = self.fc1(embeds)       # 위에서 선형층을 통과해서 평탄화한 embeds를 넣어줌
        output = self.relu(output)
        output = self.fc2(output)
        log_probs = self.log_softmax(output)
        return log_probs

In [None]:
VOCAB_SIZE = 5000
EMBED_SIZE = 300
HIDDEN_SIZE = 128
CONTEXT_SIZE = 2

model = NNLM(VOCAB_SIZE, EMBED_SIZE, HIDDEN_SIZE, CONTEXT_SIZE)
print(model)

'''
NNLM(
  (embed): Embedding(5000, 300)             # 
  (fc1): Linear(in_features=600, out_features=128, bias=True)       # input은 600, output 뉴런층은 128
  (relu): ReLU()
  (fc2): Linear(in_features=128, out_features=5000, bias=True)      # vocab_size만큼 반환해서 다음으로 오는 단어 예측
  (log_softmax): LogSoftmax(dim=1)
)
'''

NNLM(
  (embed): Embedding(5000, 300)
  (fc1): Linear(in_features=600, out_features=128, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=128, out_features=5000, bias=True)
  (log_softmax): LogSoftmax(dim=1)
)


In [8]:
X = torch.randint(0, VOCAB_SIZE, (8, CONTEXT_SIZE))     # 8개의 샘플
y = torch.randint(0, VOCAB_SIZE, (8,))                  # 8개의 단어 뒤에 올 8개의 단어

In [None]:
criterion = nn.NLLLoss()                # Negative Log Likelihood
optimizer = optim.Adam(model.parameters(), lr=0.01)

# 1회 학습
model.train()
optimizer.zero_grad()
output = model(X)
loss = criterion(output, y)
loss.backward()
optimizer.step()

# 1회 학습 후 손실 값
print(loss.item())

8.713176727294922
