# 80. ID番号への変換

In [247]:
import numpy as np
import re
from functools import reduce
from nltk.tokenize import word_tokenize
from collections import defaultdict
import json

In [248]:
def load_data(path):
    with open(path, mode='r') as f:
        X = list()
        Y = list()
        for line in f:
            line = line.strip()
            splited_line = line.split('\t')
            X.append(splited_line[0])
            Y.append(splited_line[1])
        return X, Y

def save_file_json(path, data):
    with open(path, mode='w') as out_file:
        out_file.write(json.dumps(data)+'\n')
        
def load_file_json(path):
    with open(path, mode='r') as in_file:
        data = json.load(in_file)
    return data

def chr2num(y):
    converter = {'b':0, 't':1, 'e':2, 'm':3}
    return [converter[article_type] for article_type in y]

In [267]:
class PreprocessTools:
    def __init__(self, vocab_path=None):
        self.word_count = defaultdict(int)       
        if vocab_path:
            self.word_transformer = load_file_json(vocab_path)
            self.vocab_size = len(self.word_transformer) + 1
        else:
            self.word_transformer = dict()
            self.vocab_size = -1
        
    def tokenize(self, data):
        return [[word for word in word_tokenize(txt)] for txt in data]

    def make_word_transformar(self, train_data:list):
        for data in train_data:
            for word in data:
                self.word_count[word] += 1
        sorted_word_count = sorted(self.word_count.items(), key=lambda x: x[1], reverse=True)
        for idx, (word, count) in enumerate(sorted_word_count):
            if count < 2:
                break
            else:
                self.word_transformer[word] = idx + 1
        self.vocab_size = len(self.word_transformer) + 1

    def txt2ids(self, txt_list:list):
        txt_ids = list()
        for txt in txt_list:
            ids = list()
            for word in txt:
                if word in self.word_transformer:
                    ids.append(self.word_transformer[word])
                else:
                    ids.append(0)
            txt_ids.append(ids)
        return txt_ids


    def ids2vec(self, txt_ids:list):
        txt_vec = list()
        identity = np.identity(self.vocab_size)
        for ids in txt_ids:
            txt_vec.append(identity[ids])
        return txt_vec

In [268]:
preprocess = PreprocessTools()
x_train, y_train = load_data('data/train.txt')
x_valid, y_valid = load_data('data/valid.txt')
x_test, y_test = load_data('data/test.txt')
x_train = preprocess.tokenize(x_train)
x_valid = preprocess.tokenize(x_valid)
x_test = preprocess.tokenize(x_test)

In [269]:
preprocess.make_word_transformar(x_train)

In [270]:
x_train_ids = preprocess.txt2ids(x_train)
x_valid_ids = preprocess.txt2ids(x_valid)
x_test_ids = preprocess.txt2ids(x_test)

In [271]:
for word, ids in zip(x_train[:10], x_train_ids[:10]):
    print(word, ids)

['Kindred', 'Healthcare', 'to', 'buy', 'Gentiva', 'for', 'about', '$', '573', 'mln'] [5065, 3395, 2, 181, 3396, 13, 164, 19, 0, 220]
['US', 'to', 'boost', 'ground', ',', 'naval', 'forces', 'in', 'NATO', 'countries'] [15, 2, 586, 3397, 1, 0, 4067, 6, 5066, 5067]
['Robert', 'Pattinson', '-', 'Robert', 'Pattinson', 'Brushes', 'Off', 'Kristen', 'Stewart', "'s", 'Cheating', '...'] [237, 945, 11, 237, 945, 0, 385, 669, 1259, 4, 3398, 3]
['Piers', 'Morgan', 'Delivers', 'One', 'Final', 'Blow', 'To', 'Gun', 'Violence', 'In', 'Last', 'Show'] [5068, 399, 6690, 185, 1074, 5069, 16, 3399, 2225, 20, 785, 161]
['Here', 'We', 'Go', ':', "'Star", 'Wars', 'Episode', 'VII', "'", 'Kicks', 'Off', 'Filming', 'at', 'Pinewood'] [400, 196, 639, 7, 549, 210, 295, 587, 5, 5070, 385, 1371, 22, 0]
['Amazon', 'gets', 'in', 'the', 'game', ':', 'Retailer', 'beats', 'Google', 'to', 'buy', 'hit', 'console', 'broadcasting', '...'] [169, 330, 6, 17, 1619, 7, 0, 609, 82, 2, 181, 245, 5071, 0, 3]
['FOREX-Euro', 'retreats',

In [272]:
print(preprocess.vocab_size)

9866


In [273]:
save_file_json('work/vocab.json', preprocess.word_transformer)

# 81. RNNによる予測

In [274]:
# one-hotにはしない
#xtrain_vec = preprocess.ids2vec(xtrain_ids) 
#xvalid_vec = preprocess.ids2vec(xvalid_ids)
#xtest_vec = preprocess.ids2vec(xtest_ids)

In [275]:
y_train = chr2num(y_train)
y_valid = chr2num(y_valid)
y_test = chr2num(y_test)

In [276]:
save_file_json('work/train_x.json', {'data':x_train_ids})
save_file_json('work/train_y.json', {'data': y_train})
save_file_json('work/valid_x.json', {'data': x_valid_ids})
save_file_json('work/valid_y.json', {'data': y_valid})
save_file_json('work/test_x.json', {'data': x_test_ids})
save_file_json('work/test_y.json', {'data': y_test})

In [277]:
x_train = load_file_json('work/train_x.json')['data']
y_train = np.asarray(load_file_json('work/train_y.json')['data'])
x_valid = load_file_json('work/valid_x.json')['data']
y_valid = np.asarray(load_file_json('work/valid_y.json')['data'])
x_test = load_file_json('work/test_x.json')['data']
y_test = np.asarray(load_file_json('work/test_y.json')['data'])

In [278]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## Inputs: input, h_0

- input of shape (seq_len, batch, input_size): tensor containing the features of the input sequence. The input can also be a packed variable length sequence. See torch.nn.utils.rnn.pack_padded_sequence() or torch.nn.utils.rnn.pack_sequence() for details.

- h_0 of shape (num_layers * num_directions, batch, hidden_size): tensor containing the initial hidden state for each element in the batch. Defaults to zero if not provided. If the RNN is bidirectional, num_directions should be 2, else it should be 1.

## Outputs: output, h_n

- output of shape (seq_len, batch, num_directions * hidden_size): tensor containing the output features (h_t) from the last layer of the RNN, for each t. If a torch.nn.utils.rnn.PackedSequence has been given as the input, the output will also be a packed sequence.

    For the unpacked case, the directions can be separated using output.view(seq_len, batch, num_directions, hidden_size), with forward and backward being direction 0 and 1 respectively. Similarly, the directions can be separated in the packed case.

- h_n of shape (num_layers * num_directions, batch, hidden_size): tensor containing the hidden state for t = seq_len.

    Like output, the layers can be separated using h_n.view(num_layers, num_directions, batch, hidden_size).

In [106]:
vocab_size = preprocess.vocab_size
dw, dh = 300, 50
torch.manual_seed(1234)
embed = nn.Embedding(vocab_size, dw, padding_idx=0) #idx 0 は 0埋め
rnn = nn.RNN(dw, dh, num_layers=2, bidirectional=False, batch_first=True)
linear = nn.Linear(50, 4, bias=True)
softmax = nn.LogSoftmax(dim=1) # dim=-1 or 1
input = torch.LongTensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])

In [107]:
output, hn = rnn(embed(input))
print(hn.shape)
x = linear(hn[-1])
print(x)
print(softmax(x))

torch.Size([4, 2, 50])
tensor([[ 0.3656,  0.1161, -0.2724,  0.4356],
        [ 0.1910,  0.0066, -0.2242,  0.0248]], grad_fn=<AddmmBackward>)
tensor([[-1.2179, -1.4674, -1.8559, -1.1479],
        [-1.2056, -1.3900, -1.6208, -1.3718]], grad_fn=<LogSoftmaxBackward>)


# 82. 確率的勾配降下法による学習

In [242]:
class MyRNN(torch.nn.Module):
    def __init__(self, vocab_size, dw=300, dh=50, L=4, num_layers=1, bidirectional=False, rnn_bias=True, PATH=None):
        super(MyRNN, self).__init__()
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.dw, self.dh = dw, dh
        if PATH:
            self.embed = nn.from_pretrained(PATH)
        else:
            m = nn.Embedding(vocab_size, dw, padding_idx=0)
            nn.init.normal_(m.weight, mean=0, std=dw ** -0.5)
            nn.init.constant_(m.weight[0], 0)
            self.embed = m
        self.rnn = nn.RNN(dw, dh, bias=rnn_bias, num_layers=num_layers, bidirectional=bidirectional, batch_first=True, nonlinearity='relu')
        if bidirectional:
            self.linear = nn.Linear(2 * dh, L, bias=True)
        else:
            self.linear = nn.Linear(dh, L, bias=True)
        self.softmax = nn.LogSoftmax(dim=1) # dim=-1 or 1
        
    '''
    x: ids (not one hot vector)
    '''
    def forward(self, x):
        x = self.embed(x)
        _, hidden = self.rnn(x)
        hidden = hidden.view(self.num_layers, 2 if self.bidirectional else 1, -1, self.dh)
        last_hidden = hidden[-1]
        if self.bidirectional:
            x = self.linear(torch.cat([last_hidden[0], last_hidden[1]], dim=1))
        else:
            x = self.linear(last_hidden[0])
        x = self.softmax(x)
        return x 
    
    def update_from_word2vec(self, w2v, transformer):
        for word, idx in transformer.items():
            with torch.no_grad():
                if word in w2v:
                    self.embed.weight[idx].copy_(torch.from_numpy(w2v[word]))

In [109]:
from torch.utils.tensorboard import SummaryWriter
import tqdm
from torch.utils.data import DataLoader

In [110]:
class MyDataSets(torch.utils.data.Dataset):
    def __init__(self, x, y):
        self.x = [torch.LongTensor(data) for data in x]
        self.y = [torch.LongTensor([data]) for data in y]

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [111]:
def collate_fn(batch):
    x = [data[0] for data in batch]
    x = nn.utils.rnn.pad_sequence(x, batch_first=True)
    y = torch.LongTensor([data[1] for data in batch])
    return x, y

In [112]:
def execution(data_x, data_y, op, criterion, model, batch_size=1, is_train=True, use_gpu=False):
    if is_train: model.train()
    else: model.eval()
    ndata = len(data_x)
    dataset = MyDataSets(data_x, data_y)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    sum_loss, acc_score = 0, 0
    for batch_x, batch_y in data_loader:
        op.zero_grad()
        out = model(batch_x)
        loss = criterion(out, batch_y)
        if is_train:
            loss.backward()
            op.step()
        sum_loss += loss.data.item() * len(batch_x)
        pred = torch.argmax(out, dim=1)
        acc_score += np.sum((pred == batch_y).cpu().detach().numpy())
    return sum_loss / ndata, acc_score / ndata * 100

In [163]:
x_train = load_file_json('work/train_x.json')['data']
y_train = np.asarray(load_file_json('work/train_y.json')['data'])
x_valid = load_file_json('work/valid_x.json')['data']
y_valid = np.asarray(load_file_json('work/valid_y.json')['data'])
x_test = load_file_json('work/test_x.json')['data']
y_test = np.asarray(load_file_json('work/test_y.json')['data'])

In [164]:
vocab_size = preprocess.vocab_size
torch.manual_seed(1234)
model = MyRNN(vocab_size, dw=300, dh=50, L=4, num_layers=1, bidirectional=False)
ntrain = len(x_train)
nepoch = 10 
batch_size = 1
op = optim.SGD(model.parameters(), lr=0.1)
#op = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.NLLLoss() 

In [165]:
print(model)

MyRNN(
  (embed): Embedding(9866, 300, padding_idx=0)
  (rnn): RNN(300, 50, batch_first=True)
  (linear): Linear(in_features=50, out_features=4, bias=True)
  (softmax): LogSoftmax()
)


In [166]:
train_writer = SummaryWriter(log_dir='./work/logs/train')
valid_writer = SummaryWriter(log_dir='./work/logs/valid')
logger = list()
for epoch in tqdm.notebook.tqdm(range(nepoch)):
    train_loss, train_acc = execution(x_train, y_train, op, criterion, model, batch_size=batch_size)
    train_writer.add_scalar("loss", train_loss, epoch) 
    train_writer.add_scalar("accuracy", train_acc, epoch)
    with torch.no_grad():
        valid_loss, valid_acc = execution(x_valid, y_valid, op, criterion, model, batch_size=batch_size, is_train=False)
        valid_writer.add_scalar("loss", valid_loss, epoch)
        valid_writer.add_scalar("accuracy", valid_acc, epoch)
    logger.append({'epoch':epoch, 'train_loss':train_loss, 'train_acc':train_acc, 'valid_loss':valid_loss, 'valid_acc':valid_acc})
    print({'epoch':epoch, 'train_loss':train_loss, 'train_acc':train_acc, 'valid_loss':valid_loss, 'valid_acc':valid_acc})
train_writer.close()
valid_writer.close()

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

{'epoch': 0, 'train_loss': 1.1710304718339042, 'train_acc': 41.08614232209738, 'valid_loss': 1.161880460214079, 'valid_acc': 42.54681647940075}
{'epoch': 1, 'train_loss': 1.1614205118422205, 'train_acc': 42.537453183520604, 'valid_loss': 1.163245714112614, 'valid_acc': 49.13857677902622}
{'epoch': 2, 'train_loss': 1.1620618166548482, 'train_acc': 41.21722846441948, 'valid_loss': 1.1617270750945874, 'valid_acc': 42.54681647940075}
{'epoch': 3, 'train_loss': 1.161140552710058, 'train_acc': 41.947565543071164, 'valid_loss': 1.1605979920326548, 'valid_acc': 43.37078651685393}
{'epoch': 4, 'train_loss': 1.1606137023882919, 'train_acc': 42.79962546816479, 'valid_loss': 1.162265390492557, 'valid_acc': 47.041198501872664}
{'epoch': 5, 'train_loss': 1.159734628352333, 'train_acc': 42.930711610486895, 'valid_loss': 1.1590327347709, 'valid_acc': 43.37078651685393}
{'epoch': 6, 'train_loss': 1.1581479981597442, 'train_acc': 44.04494382022472, 'valid_loss': 1.1556194185764155, 'valid_acc': 48.68913

# 83. ミニバッチ化・GPU上での学習

In [188]:
%%file src/gpu.py

import numpy as np
from functools import reduce
from nltk.tokenize import word_tokenize
from collections import defaultdict
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
from torch.utils.data import DataLoader

class PreprocessTools:
    def __init__(self, vocab_path=None):
        self.word_count = defaultdict(int)       
        if vocab_path:
            self.word_transformer = load_file_json(vocab_path)
            self.vocab_size = len(self.word_transformer) + 1
        else:
            self.word_transformer = defaultdict(int)
            self.vocab_size = -1
        
    def tokenize(self, data):
        return [[word for word in word_tokenize(txt)] for txt in data]

    def make_word_transformar(self, train_data:list):
        for data in train_data:
            for word in data:
                self.word_count[word] += 1
        sorted_word_count = sorted(self.word_count.items(), key=lambda x: x[1], reverse=True)
        for idx, (word, count) in enumerate(sorted_word_count):
            if count < 2:
                break
            else:
                self.word_transformer[word] = idx + 1
        self.vocab_size = len(self.word_transformer) + 1

    def txt2ids(self, txt_list:list):
        txt_ids = list()
        for txt in txt_list:
            ids = list()
            for word in txt:
                ids.append(self.word_transformer[word])
            txt_ids.append(ids)
        return txt_ids


    def ids2vec(self, txt_ids:list):
        txt_vec = list()
        identity = np.identity(self.vocab_size)
        for ids in txt_ids:
            txt_vec.append(identity[ids])
        return txt_vec
    
    
def load_data(path):
    with open(path, mode='r') as f:
        X = list()
        Y = list()
        for line in f:
            line = line.strip()
            splited_line = line.split('\t')
            X.append(splited_line[0])
            Y.append(splited_line[1])
        return X, Y

def save_file_json(path, data):
    with open(path, mode='w') as out_file:
        out_file.write(json.dumps(data)+'\n')
        
def load_file_json(path):
    with open(path, mode='r') as in_file:
        data = json.load(in_file)
    return data

def chr2num(y):
    converter = {'b':0, 't':1, 'e':2, 'm':3}
    return [converter[article_type] for article_type in y]

class MyRNN(torch.nn.Module):
    def __init__(self, vocab_size, dw=300, dh=50, L=4, num_layers=1, bidirectional=False, rnn_bias=True, PATH=None):
        super(MyRNN, self).__init__()
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.dw, self.dh = dw, dh
        if PATH:
            self.embed = nn.from_pretrained(PATH)
        else:
            m = nn.Embedding(vocab_size, dw, padding_idx=0)
            nn.init.normal_(m.weight, mean=0, std=dw ** -0.5)
            nn.init.constant_(m.weight[0], 0)
            self.embed = m
        self.rnn = nn.RNN(dw, dh, bias=rnn_bias, num_layers=num_layers, bidirectional=bidirectional, batch_first=True, nonlinearity='tanh')
        if bidirectional:
            self.linear = nn.Linear(2 * dh, L, bias=True)
        else:
            self.linear = nn.Linear(dh, L, bias=True)
        self.softmax = nn.LogSoftmax(dim=1) # dim=-1 or 1
        
    '''
    x: ids (not one hot vector)
    '''
    def forward(self, x):
        x = self.embed(x)
        _, hidden = self.rnn(x)
        hidden = hidden.view(self.num_layers, 2 if self.bidirectional else 1, -1, self.dh)
        last_hidden = hidden[-1]
        if self.bidirectional:
            x = self.linear(torch.cat([last_hidden[0], last_hidden[1]], dim=1))
        else:
            x = self.linear(last_hidden[0])
        x = self.softmax(x)
        return x 

class MyDataSets(torch.utils.data.Dataset):
    def __init__(self, x, y):
        self.x = [torch.LongTensor(data) for data in x]
        self.y = [torch.LongTensor([data]) for data in y]

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]
    
def collate_fn(batch):
    x = [data[0] for data in batch]
    x = nn.utils.rnn.pad_sequence(x, batch_first=True)
    y = torch.LongTensor([data[1] for data in batch])
    return x, y

    
def execution(data_x, data_y, op, criterion, model, batch_size=1, is_train=True, use_gpu=False):
    if is_train: model.train()
    else: model.eval()
    ndata = len(data_x)
    dataset = MyDataSets(data_x, data_y)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    sum_loss, acc_score = 0, 0
    for batch_x, batch_y in data_loader:
        op.zero_grad()
        if use_gpu:
            batch_x = batch_x.cuda()
            batch_y = batch_y.cuda()
        out = model(batch_x)
        loss = criterion(out, batch_y)
        if is_train:
            loss.backward()
            op.step()
        sum_loss += loss.data.item() * len(batch_x)
        pred = torch.argmax(out, dim=1)
        acc_score += np.sum((pred == batch_y).cpu().detach().numpy())
    return sum_loss / ndata, acc_score / ndata * 100


if __name__ == "__main__":
    preprocess = PreprocessTools('work/vocab.json')
    
    x_train = load_file_json('work/train_x.json')['data']
    y_train = np.asarray(load_file_json('work/train_y.json')['data'])
    x_valid = load_file_json('work/valid_x.json')['data']
    y_valid = np.asarray(load_file_json('work/valid_y.json')['data'])
    x_test = load_file_json('work/test_x.json')['data']
    y_test = np.asarray(load_file_json('work/test_y.json')['data'])


    vocab_size = preprocess.vocab_size
    torch.manual_seed(1234)
    model = MyRNN(vocab_size, dw=300, dh=50, L=4, num_layers=2, bidirectional=True)
    ntrain = len(x_train)
    nepoch = 10 
    batch_size = 128 
    op = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.NLLLoss() 

    train_writer = SummaryWriter(log_dir='./work/logs/train')
    valid_writer = SummaryWriter(log_dir='./work/logs/valid')
    logger = list()
    for epoch in tqdm.tqdm(range(nepoch)):
        train_loss, train_acc = execution(x_train, y_train, op, criterion, model, batch_size=batch_size)
        train_writer.add_scalar("loss", train_loss, epoch) 
        train_writer.add_scalar("accuracy", train_acc, epoch)
        with torch.no_grad():
            valid_loss, valid_acc = execution(x_valid, y_valid, op, criterion, model, batch_size=batch_size, is_train=False)
            valid_writer.add_scalar("loss", valid_loss, epoch)
            valid_writer.add_scalar("accuracy", valid_acc, epoch)
        logger.append({'epoch':epoch, 'train_loss':train_loss, 'train_acc':train_acc, 'valid_loss':valid_loss, 'valid_acc':valid_acc})
        print({'epoch':epoch, 'train_loss':train_loss, 'train_acc':train_acc, 'valid_loss':valid_loss, 'valid_acc':valid_acc})
    train_writer.close()
    valid_writer.close()

Overwriting src/gpu.py


# 84. 単語ベクトルの導入

In [279]:
from gensim.models import KeyedVectors

In [280]:
w2v = KeyedVectors.load_word2vec_format('data/GoogleNews-vectors-negative300.bin', binary=True)

In [314]:
vocab_size = preprocess.vocab_size
torch.manual_seed(1234)
model = MyRNN(vocab_size, dw=300, dh=50, L=4, num_layers=1, bidirectional=False)
model.update_from_word2vec(w2v, preprocess.word_transformer)
ntrain = len(x_train)
nepoch = 10 
batch_size = 128 
#op = optim.SGD(model.parameters(), lr=0.1)
op = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.NLLLoss() 

In [315]:
print(model)

MyRNN(
  (embed): Embedding(9866, 300, padding_idx=0)
  (rnn): RNN(300, 50, batch_first=True)
  (linear): Linear(in_features=50, out_features=4, bias=True)
  (softmax): LogSoftmax()
)


In [316]:
train_writer = SummaryWriter(log_dir='./work/logs/train')
valid_writer = SummaryWriter(log_dir='./work/logs/valid')
logger = list()
for epoch in tqdm.notebook.tqdm(range(nepoch)):
    train_loss, train_acc = execution(x_train, y_train, op, criterion, model, batch_size=batch_size)
    train_writer.add_scalar("loss", train_loss, epoch) 
    train_writer.add_scalar("accuracy", train_acc, epoch)
    with torch.no_grad():
        valid_loss, valid_acc = execution(x_valid, y_valid, op, criterion, model, batch_size=batch_size, is_train=False)
        valid_writer.add_scalar("loss", valid_loss, epoch)
        valid_writer.add_scalar("accuracy", valid_acc, epoch)
    logger.append({'epoch':epoch, 'train_loss':train_loss, 'train_acc':train_acc, 'valid_loss':valid_loss, 'valid_acc':valid_acc})
    print({'epoch':epoch, 'train_loss':train_loss, 'train_acc':train_acc, 'valid_loss':valid_loss, 'valid_acc':valid_acc})
train_writer.close()
valid_writer.close()

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

{'epoch': 0, 'train_loss': 1.0841823437687164, 'train_acc': 50.468164794007485, 'valid_loss': 0.8035261861840437, 'valid_acc': 73.85767790262172}
{'epoch': 1, 'train_loss': 0.5676666072245394, 'train_acc': 79.05430711610487, 'valid_loss': 0.46076675584253746, 'valid_acc': 83.07116104868913}
{'epoch': 2, 'train_loss': 0.3240814438919896, 'train_acc': 87.34082397003745, 'valid_loss': 0.45489098219835805, 'valid_acc': 83.59550561797752}
{'epoch': 3, 'train_loss': 0.2447720053490628, 'train_acc': 89.84082397003745, 'valid_loss': 0.41074375164196286, 'valid_acc': 85.0187265917603}
{'epoch': 4, 'train_loss': 0.20364386380723354, 'train_acc': 90.53370786516854, 'valid_loss': 0.4525810466946734, 'valid_acc': 85.2434456928839}
{'epoch': 5, 'train_loss': 0.16895339485634578, 'train_acc': 91.44194756554307, 'valid_loss': 0.39696553704443943, 'valid_acc': 85.76779026217228}
{'epoch': 6, 'train_loss': 0.14188933785488542, 'train_acc': 92.61235955056179, 'valid_loss': 0.4342009868291433, 'valid_acc'

# 85. 双方向RNN・多層化

In [317]:
class MyRNN(torch.nn.Module):
    def __init__(self, vocab_size, dw=300, dh=50, L=4, num_layers=1, bidirectional=False, dropout=0.0, rnn_bias=True, PATH=None):
        super(MyRNN, self).__init__()
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.dw, self.dh = dw, dh
        if PATH:
            self.embed = nn.from_pretrained(PATH)
        else:
            m = nn.Embedding(vocab_size, dw, padding_idx=0)
            nn.init.normal_(m.weight, mean=0, std=dw ** -0.5)
            nn.init.constant_(m.weight[0], 0)
            self.embed = m
        self.rnn = nn.RNN(dw, dh, bias=rnn_bias, num_layers=num_layers, bidirectional=bidirectional, batch_first=True, nonlinearity='relu', dropout=dropout)
        if bidirectional:
            self.linear = nn.Linear(2 * dh, L, bias=True)
        else:
            self.linear = nn.Linear(dh, L, bias=True)
        self.softmax = nn.LogSoftmax(dim=1) # dim=-1 or 1
        
    '''
    x: ids (not one hot vector)
    '''
    def forward(self, x):
        x = self.embed(x)
        _, hidden = self.rnn(x)
        hidden = hidden.view(self.num_layers, 2 if self.bidirectional else 1, -1, self.dh)
        last_hidden = hidden[-1]
        if self.bidirectional:
            x = self.linear(torch.cat([last_hidden[0], last_hidden[1]], dim=1))
        else:
            x = self.linear(last_hidden[0])
        x = self.softmax(x)
        return x 
    
    def update_from_word2vec(self, w2v, transformer):
        for word, idx in transformer.items():
            with torch.no_grad():
                if word in w2v:
                    self.embed.weight[idx].copy_(torch.from_numpy(w2v[word]))

In [324]:
vocab_size = preprocess.vocab_size
torch.manual_seed(1234)
model = MyRNN(vocab_size, dw=300, dh=50, L=4, num_layers=2, bidirectional=True, dropout=0.6)
model.update_from_word2vec(w2v, preprocess.word_transformer)
ntrain = len(x_train)
nepoch = 30 
batch_size = 64 
#op = optim.SGD(model.parameters(), lr=0.01, weight_decay=0.001)
op = optim.Adagrad(model.parameters(), lr=0.01, lr_decay=0.001)
#op = optim.Adam(model.parameters(), lr=0.001)
#nn.utils.clip_grad_norm_(model.parameters(), 0.01)
criterion = nn.NLLLoss() 
train_writer = SummaryWriter(log_dir='./work/logs/train')
valid_writer = SummaryWriter(log_dir='./work/logs/valid')
logger = list()
for epoch in tqdm.notebook.tqdm(range(nepoch)):
    train_loss, train_acc = execution(x_train, y_train, op, criterion, model, batch_size=batch_size)
    train_writer.add_scalar("loss", train_loss, epoch) 
    train_writer.add_scalar("accuracy", train_acc, epoch)
    with torch.no_grad():
        valid_loss, valid_acc = execution(x_valid, y_valid, op, criterion, model, batch_size=batch_size, is_train=False)
        valid_writer.add_scalar("loss", valid_loss, epoch)
        valid_writer.add_scalar("accuracy", valid_acc, epoch)
    logger.append({'epoch':epoch, 'train_loss':train_loss, 'train_acc':train_acc, 'valid_loss':valid_loss, 'valid_acc':valid_acc})
    print({'epoch':epoch, 'train_loss':train_loss, 'train_acc':train_acc, 'valid_loss':valid_loss, 'valid_acc':valid_acc})
train_writer.close()
valid_writer.close()

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

{'epoch': 0, 'train_loss': 0.5214494584874714, 'train_acc': 80.32771535580524, 'valid_loss': 0.40309572698694934, 'valid_acc': 84.34456928838952}
{'epoch': 1, 'train_loss': 0.2972070640392518, 'train_acc': 88.65168539325843, 'valid_loss': 0.3528627883182483, 'valid_acc': 86.81647940074907}
{'epoch': 2, 'train_loss': 0.19832998494083962, 'train_acc': 93.04307116104869, 'valid_loss': 0.3388892986354756, 'valid_acc': 88.91385767790261}
{'epoch': 3, 'train_loss': 0.129997069045399, 'train_acc': 95.76779026217228, 'valid_loss': 0.3542939303519574, 'valid_acc': 89.8876404494382}
{'epoch': 4, 'train_loss': 0.08409301009071007, 'train_acc': 97.35955056179775, 'valid_loss': 0.3383731834450911, 'valid_acc': 90.56179775280899}
{'epoch': 5, 'train_loss': 0.06061504503035367, 'train_acc': 98.14606741573034, 'valid_loss': 0.390652206640565, 'valid_acc': 91.08614232209737}
{'epoch': 6, 'train_loss': 0.04411436755642686, 'train_acc': 98.70786516853933, 'valid_loss': 0.41162516456864745, 'valid_acc': 9

# 86. 畳み込みニューラルネットワーク (CNN)

In [405]:
vocab_size = preprocess.vocab_size
dw, dh = 300, 50
torch.manual_seed(1234)
embed = nn.Embedding(vocab_size, dw, padding_idx=0) #idx 0 は 0埋め
kernel_size = [3 * dw, dh]
cnn = nn.Conv2d(1, 1, kernel_size, padding=0, stride=1, padding_mode='replicate')
linear = nn.Linear(50, 4, bias=True)
softmax = nn.LogSoftmax(dim=1) # dim=-1 or 1
input = torch.LongTensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])

In [406]:
print(cnn.weight.shape)

torch.Size([1, 1, 900, 50])


In [416]:
x = embed(input)
torch.cat([x[0][0], x[0][1]]).shape
#print(cnn(x).shape)

torch.Size([600])

In [411]:
class MyCNN(torch.nn.Module):
    def __init__(self, vocab_size, dw=300, dh=50, L=4, dropout=0.0, PATH=None):
        super(MyRNN, self).__init__()
        self.dw, self.dh = dw, dh
        if PATH:
            self.embed = nn.from_pretrained(PATH)
        else:
            m = nn.Embedding(vocab_size, dw, padding_idx=0)
            nn.init.normal_(m.weight, mean=0, std=dw ** -0.5)
            nn.init.constant_(m.weight[0], 0)
            self.embed = m
        self.softmax = nn.LogSoftmax(dim=1) # dim=-1 or 1
        
    '''
    x: ids (not one hot vector)
    '''
    def forward(self, x):
        x = self.embed(x)
        _, hidden = self.rnn(x)
        hidden = hidden.view(self.num_layers, 2 if self.bidirectional else 1, -1, self.dh)
        last_hidden = hidden[-1]
        if self.bidirectional:
            x = self.linear(torch.cat([last_hidden[0], last_hidden[1]], dim=1))
        else:
            x = self.linear(last_hidden[0])
        x = self.softmax(x)
        return x 
    
    def update_from_word2vec(self, w2v, transformer):
        for word, idx in transformer.items():
            with torch.no_grad():
                if word in w2v:
                    self.embed.weight[idx].copy_(torch.from_numpy(w2v[word]))

# 87. 確率的勾配降下法によるCNNの学習

# 88. パラメータチューニング

# 89. 事前学習済み言語モデルからの転移学習