In [None]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [None]:
import pickle

tr = pickle.load(open('Dataset/tr.pkl', 'rb'))
vl = pickle.load(open('Dataset/vl.pkl', 'rb'))
ts = pickle.load(open('Dataset/ts.pkl', 'rb'))

print(len(tr), len(vl), len(ts))

In [None]:
import torch as T
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import numpy as np

import spacy

NLP = spacy.load('en_core_web_lg')

In [None]:
LEN_MAX = 404

class DS_Imdb(Dataset):
    def __init__(self, dat):
        self.dat = dat
        self.END = NLP('。')[0].vector
        
    def __len__(self):
        return len(self.dat)
    
    def __getitem__(self, idx):
        sent, lbl  = self.dat[idx]
        l = len(sent)
        
        inp = np.zeros((LEN_MAX, 300))
        for i in range(l):
            inp[i] = sent[i].vector
        for i in range(l, LEN_MAX):
            inp[i] = self.END
        
        return l, inp, lbl

ld_tr = DataLoader(DS_Imdb(tr), batch_size=32, shuffle=True)
ld_vl = DataLoader(DS_Imdb(vl), batch_size=64)
ld_ts = DataLoader(DS_Imdb(ts), batch_size=64)

for l, inp, ans in ld_tr:
    print(l.shape, inp.shape, ans.shape)
    
    break

In [None]:
class Model_LSTM(nn.Module):
    def __init__(self, hid_size=256, mode='vanilla'):
        super(Model_LSTM, self).__init__()
        
        self.hid_size = hid_size
        self.mode = mode
        
        self.lstm = nn.LSTM(300, self.hid_size, num_layers=2, 
                            batch_first=True, dropout=0.2)
        self.fc = nn.Sequential(*[nn.Linear(self.hid_size, 100), nn.ReLU(), nn.Dropout(0.25), 
                                  nn.Linear(100, 2)]) 
    
    def forward(self, l, inp):
        l = l.numpy()
        batch_size = inp.shape[0]
        
        outs = []
        for i in range(batch_size):
            out, _ = self.lstm(inp[i:i+1, :l[i]])
            out = out[:, -1]
            
            outs.append(out)
        out = T.cat(outs, dim=0)
        
        out = self.fc(out)
        
        return out

model = Model_LSTM().cuda()

out = model(l, inp.float().cuda())
print(out.shape)

In [None]:
loss_func = nn.CrossEntropyLoss().cuda()
optim = T.optim.Adam(model.parameters(), lr=0.0008)

EPOCHS = 200

In [None]:
class EarlyStop:
    def __init__(self, threshold=10):
        self.threshold = threshold

        self.acc_max = 0
        self.cnt = 0

    def add(self, acc):
        if acc<self.acc_max:
            self.cnt += 1
        else:
            self.cnt = 0
            self.acc_max = acc

        if self.cnt>=self.threshold:
            return True
        else:
            return False

ES = EarlyStop()

In [None]:
from tqdm import tqdm_notebook as tqdm

In [None]:
for e in tqdm(range(EPOCHS)):
    ls_ep = 0
    
    model.train()
    with tqdm(ld_tr) as TQ:
        for l, inp, ans in TQ:
            out = model(l, inp.float().cuda())
            ls_bh = loss_func(out, ans.cuda())
            
            optim.zero_grad()
            ls_bh.backward()
            optim.step()
            
            ls_bh = ls_bh.cpu().detach().numpy()
            TQ.set_postfix(ls_bh='%.3f'%(ls_bh))
            ls_ep += ls_bh
        
        ls_ep /= len(TQ)
        print('Ep %d: %.4f' % (e+1, ls_ep))
        
        T.save(model.state_dict(), 'Model/lstm-vanilla_%d.pt' % (e+1))
        
    acc_ep = 0
    
    model.eval()
    with tqdm(ld_vl) as TQ:
        for l, inp, ans in TQ:
            out = model(l, inp.float().cuda())
            
            out = out.cpu().detach().numpy()
            out = np.argmax(out, axis=1)
            ans = ans.numpy()
            
            acc_bh = np.average(out==ans)
            TQ.set_postfix(acc_bh='%.3f'%(acc_bh))
            acc_ep += acc_bh
        
        acc_ep /= len(TQ)
        print('%.4f'%(acc_ep))
    
    if ES.add(acc_ep)==True:
        print('Finish training in ep=%d'%(e+1))
        
        break

In [None]:
model.load_state_dict(T.load('Model/lstm-vanilla.pt'))

In [None]:
acc_ep = 0

model.eval()
with tqdm(ld_ts) as TQ:
    for l, inp, ans in TQ:
        out = model(l, inp.float().cuda())
            
        out = out.cpu().detach().numpy()
        out = np.argmax(out, axis=1)
        ans = ans.numpy()
            
        acc_bh = np.average(out==ans)
        TQ.set_postfix(acc_bh='%.3f'%(acc_bh))
        acc_ep += acc_bh
        
    acc_ep /= len(TQ)
    print('%.4f'%(acc_ep))