In [1]:
from typing import List, Tuple

import pandas as pd
import torch
import numpy as np

from module import Word2IdConverter, sentence2words

In [2]:
torch.cuda.is_available()

True

In [3]:
train_df = pd.read_csv('data/train.txt', sep='\t')
train_df.shape

(10672, 8)

In [4]:
converter = Word2IdConverter('data/mapping.csv')
n_words = converter.get_n_words()

In [5]:
results = map(sentence2words, train_df.title)
results = map(converter.word2id, results)
X = list(map(lambda x: torch.Tensor(x).long(), results))
len(X)

10672

In [6]:
y = torch.Tensor(train_df.category.map({'b': 0, 't': 1, 'e': 2, 'm': 3}).to_list()).long()

In [7]:
class RNN(torch.nn.Module):
    def __init__(self, vocab_size: int, input_size: int, hidden_size: int, output_size: int):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = torch.nn.Embedding(vocab_size, input_size)
        self.rnn = torch.nn.RNN(input_size, hidden_size)
        self.linear = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x: torch.Tensor, hidden: torch.Tensor) -> torch.Tensor:
        x = self.embedding(x)
        x, _ = self.rnn(x, hidden)
        x = self.linear(x[:, -1])
        return torch.log_softmax(x, dim=1)

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

In [8]:
dw = 300
dh = 50
n_class = 4
rnn = RNN(vocab_size=n_words, input_size=dw, hidden_size=dh, output_size=n_class).cuda()

In [9]:
n_train_size = 10000
X_train = X[:n_train_size]
y_train = y[:n_train_size]
#X_train = X
#y_train = y

In [10]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(rnn.parameters(), lr=0.01)

In [13]:
n_epochs = 10
batch_size = 256
for epoch in range(1, n_epochs+1):
    optimizer.step()
    for i in range(0, len(X_train), batch_size):
        X_pad = torch.nn.utils.rnn.pad_sequence(X_train[i:i+batch_size], batch_first=True).cuda()
        h_0 = torch.zeros(1*X_pad.shape[1]*dh).reshape(1, X_pad.shape[1], dh).cuda()

        y_pred = rnn(X_pad, h_0)
        loss = criterion(y_pred, y_train[i:i+batch_size].cuda())
        loss.backward()
        optimizer.step()
    print(f'epoch {epoch}: {loss.item()}')
print('DONE')

epoch 1: 1.3933919668197632
epoch 2: 1.4459608793258667
epoch 3: 1.3067868947982788
epoch 4: 1.2725591659545898
epoch 5: 1.3662787675857544
epoch 6: 1.278447151184082
epoch 7: 1.2638437747955322
epoch 8: 1.327663779258728
epoch 9: 1.3854800462722778
epoch 10: 1.2451664209365845
DONE


In [14]:
y_pred.argmax(dim=1)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0], device='cuda:0')