<a href="https://colab.research.google.com/github/ysj9909/NLP_practice/blob/main/NPLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Neual Probability Language Model 코드 구현 연습!!**

* paper link: https://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf

* reference link : https://github.com/graykode/nlp-tutorial/tree/master/1-1.NNLM

* Datasert : text9 corpus


In [None]:
import torch

f = open('text9.txt', 'r')
words = f.read().split(' ')
f.close()

words = words[:100]

word2idx = {}


idx = 0
for word in words:
  if not word in word2idx:
    word2idx[word] = idx
    idx += 1

idx2word = {j : i for i, j in word2idx.items()}

Vocabulary_size = len(word2idx)

def make_batch(words, n_gram):
  input_batch = []
  target_batch = []

  for i in range(n_gram , len(words)):
    input = [ word2idx[word] for word in words[i - n_gram: i]]
    target = word2idx[words[i]]

    input_batch.append(input)
    target_batch.append(target)
  return input_batch, target_batch

n_gram = 3
hidden_dim = 50
embed_dim = 50
learning_rate = 0.001
num_epochs = 100

input_batch, target_batch = make_batch(words, n_gram)
input_batch = torch.LongTensor(input_batch)
target_batch = torch.LongTensor(target_batch)



In [None]:
print(input_batch.size())

torch.Size([97, 3])


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim



class NNLM(nn.Module):
  def __init__(self, vocab_size, embed_dim, hidden_dim, n_gram, is_direct_connection = False):
    super(NNLM, self).__init__()
    self.is_direct_connection = is_direct_connection
    self.n_gram = n_gram
    self.embed_dim = embed_dim


    self.embedding = nn.Embedding(vocab_size, embed_dim)
    self.fc_hidden = nn.Linear(n_gram * embed_dim, hidden_dim)
    self.fc_out = nn.Linear(hidden_dim, vocab_size)
    if is_direct_connection:
      self.W = nn.Linear(n_gram * embed_dim, vocab_size)
    self.bias = torch.zeros(vocab_size)

  def forward(self, input):
    embedded = self.embedding(input) # (batch_size, n_gram, embed_dim)
    embedded = embedded.view(-1, self.n_gram * self.embed_dim) # (batch_size, n_gram * embed_dim)
    hidden_state = torch.tanh(self.fc_hidden(embedded))
    if self.is_direct_connection:
      y = nn.Parameter(self.bias) + self.W(embedded) + self.fc_out(hidden_state) 
    else:
      y = nn.Parameter(self.bias) + self.fc_out(hidden_state)

    return y


model = NNLM(Vocabulary_size, embed_dim, hidden_dim, n_gram, True)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)


for epoch in range(num_epochs):
  outputs = model(input_batch)
  loss = criterion(outputs, target_batch)
  
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  
  if (epoch + 1) % 10 == 0:
    print(f"Epoch[{epoch + 1} / {num_epochs}], Loss : {loss.item()}")

# Predict
predicted = model(input_batch).data.max(1, keepdim = True)[1]

# Test
for i in range(5):
  print("model's output : ", idx2word[int(predicted[i])],' -> ' ,"target word : ", idx2word[int(target_batch[i])])

Epoch[10 / 100], Loss : 2.9890730381011963
Epoch[20 / 100], Loss : 1.741727352142334
Epoch[30 / 100], Loss : 0.8934531211853027
Epoch[40 / 100], Loss : 0.44029298424720764
Epoch[50 / 100], Loss : 0.22925002872943878
Epoch[60 / 100], Loss : 0.13942982256412506
Epoch[70 / 100], Loss : 0.09908091276884079
Epoch[80 / 100], Loss : 0.07777344435453415
Epoch[90 / 100], Loss : 0.06488428264856339
Epoch[100 / 100], Loss : 0.05624343082308769
model's output :  a  ->  target word :  a
model's output :  term  ->  target word :  term
model's output :  of  ->  target word :  of
model's output :  abuse  ->  target word :  abuse
model's output :  first  ->  target word :  originated


In [None]:
import torch

a = torch.zeros(3, 5)
print(a[1].size())
print(a[:1].size())

torch.Size([5])
torch.Size([1, 5])


In [None]:
import torch

a = torch.arange(15).view(3, 5)
embedding = torch.nn.Embedding(20, 3)
print(a.size())
b = embedding(a)
print(b.size())

torch.Size([3, 5])
torch.Size([3, 5, 3])
