In [1]:
#各種インポート
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
#データ読み込み
df = pd.read_csv('./newsCorpora_re.csv', header=None, sep='\t', names=['ID', 'TITLE', 'URL', 'PUBLISHER', 'CATEGORY', 'STORY', 'HOSTNAME', 'TIMESTAMP'])
#データを取り出す
df = df.loc[df['PUBLISHER'].isin(['Reuters', 'Huffington Post', 'Businessweek', 'Contactmusic.com', 'Daily Mail']), ['TITLE', 'CATEGORY']]

#データを分ける
train, valid_test = train_test_split(df, test_size=0.2, shuffle=True, random_state=123, stratify=df['CATEGORY'])
valid, test = train_test_split(valid_test, test_size=0.5, shuffle=True, random_state=123, stratify=valid_test['CATEGORY'])

#インデックスを振り直す
train.reset_index(drop=True, inplace=True)
valid.reset_index(drop=True, inplace=True)
test.reset_index(drop=True, inplace=True)

In [3]:
from collections import defaultdict
import string

In [4]:
d = defaultdict(int)
table = str.maketrans(string.punctuation, ' '*len(string.punctuation))

#出現回数をカウント
for text in train['TITLE']:
    for word in text.translate(table).split():
        d[word] += 1


d = sorted(d.items(), key=(lambda x : x[1]), reverse=True)
#辞書を確認
print(d)



In [5]:
#単語とIDの対応の辞書
word2id = {word : i + 1 for i, (word, cnt) in enumerate(d) if cnt > 1 }

#確認
print(word2id)



In [6]:
def tokenizer(text, word2id=word2id, unk=0):
    table = str.maketrans(string.punctuation, ' '*len(string.punctuation))
    return [word2id.get(word, unk) for word in text.translate(table).split()]

In [7]:
import torch
from torch.utils.data import Dataset

In [8]:
class CreateDataset(Dataset):
    def __init__(self, X, y, tokenizer):
        self.X = X
        self.y = y
        self.tokenizer = tokenizer
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, index):
        text = self.X[index]
        inputs = self.tokenizer(text)
        
        return torch.tensor(inputs, dtype=torch.int64), torch.tensor(self.y[index], dtype=torch.int64)

In [9]:
#カテゴリーと数字の対応
category_dict = {'b' : 0, 't' : 1, 'e' : 2, 'm' : 3}

#カテゴリーを数字に変換する
y_train = train['CATEGORY'].map(lambda x : category_dict[x]).values
y_valid = valid['CATEGORY'].map(lambda x : category_dict[x]).values
y_test = test['CATEGORY'].map(lambda x : category_dict[x]).values

In [10]:
train_ds = CreateDataset(train['TITLE'], y_train, tokenizer)
valid_ds = CreateDataset(valid['TITLE'], y_valid, tokenizer)
test_ds = CreateDataset(test['TITLE'], y_test, tokenizer)

print('確認(訓練データ)')
print('len(train_ds) : {}'.format(len(train_ds)))
for i in range(5):
    print('単語ID : {}, カテゴリー : {}'.format(train_ds[i][0], train_ds[i][1]))

print('確認(検証データ)')
print('len(valid_ds) : {}'.format(len(valid_ds)))
for i in range(5):
    print('単語ID : {}, カテゴリー : {}'.format(valid_ds[i][0], valid_ds[i][1]))
    
print('確認(評価データ)')
print('len(test_ds) : {}'.format(len(test_ds)))
for i in range(5):
    print('単語ID : {}, カテゴリー : {}'.format(test_ds[i][0], test_ds[i][1]))

確認(訓練データ)
len(train_ds) : 10684
単語ID : tensor([ 229,    5,   11,  172,  786,  114,   31,    8, 6667,  129,    6, 3527,
        5175]), カテゴリー : 0
単語ID : tensor([ 169,  539,    1,  683, 1237,   82,  279, 1898, 4199]), カテゴリー : 1
単語ID : tensor([ 540,  321,  236,    0,   16, 3528,    0, 1238, 6668, 4200, 2664, 2335]), カテゴリー : 3
単語ID : tensor([ 135,   32,  684, 3529, 1073,  936,   25,  170,  197,   35,    9,  268,
         614]), カテゴリー : 2
単語ID : tensor([4201, 6669, 1899,   22, 1350, 3530,   82, 4202]), カテゴリー : 0
確認(検証データ)
len(valid_ds) : 1336
単語ID : tensor([   5,   13,    7, 5186,  323,    0,   89, 1950,  759,  302,   71]), カテゴリー : 1
単語ID : tensor([ 157,   66,  241,   66,  241,    0,   12, 1360, 1492]), カテゴリー : 2
単語ID : tensor([1348, 1450,  532,  533,   75,  117,    0,  202,    6, 1453, 1014,  244,
           8,  999]), カテゴリー : 0
単語ID : tensor([   5,   13,  425, 6463,    8,    0, 3859,   26,    0,    0,    0]), カテゴリー : 0
単語ID : tensor([ 371,  994, 2844, 3077,  422,    8,  233,    2, 1303]),

In [11]:
from torch import nn

In [12]:
class RNN(nn.Module):
    def __init__(self, vocab_size, emb_size, padding_idx, output_size, hidden_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.emb = nn.Embedding(vocab_size, emb_size, padding_idx=padding_idx)
        self.rnn = nn.RNN(emb_size, hidden_size, nonlinearity='tanh',batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        self.batch_size = x.size()[0]
        emb = self.emb(x)
        hidden = self.init_hidden()
        out, hidden = self.rnn(emb, hidden)
        out = self.fc(out[:,-1,:])
        return out
    
    def init_hidden(self):
        return torch.zeros(1, self.batch_size, self.hidden_size)

In [13]:
# パラメータの設定
VOCAB_SIZE = len(word2id.values()) + 1
EMB_SIZE = 300
PADDING_IDX = len(word2id.values())
OUTPUT_SIZE = 4
HIDDEN_SIZE = 50


# モデルの定義
model = RNN(VOCAB_SIZE, EMB_SIZE, PADDING_IDX, OUTPUT_SIZE, HIDDEN_SIZE)

In [14]:
#10件分の予測

for i in range(10):
    X = train_ds[i][0]
    print(torch.softmax(model(X.unsqueeze(0)),dim=-1))

tensor([[0.1156, 0.3781, 0.1575, 0.3489]], grad_fn=<SoftmaxBackward>)
tensor([[0.2675, 0.3933, 0.1043, 0.2349]], grad_fn=<SoftmaxBackward>)
tensor([[0.2659, 0.2665, 0.2297, 0.2379]], grad_fn=<SoftmaxBackward>)
tensor([[0.2476, 0.1675, 0.1393, 0.4456]], grad_fn=<SoftmaxBackward>)
tensor([[0.3880, 0.2386, 0.1751, 0.1983]], grad_fn=<SoftmaxBackward>)
tensor([[0.3148, 0.2382, 0.1552, 0.2918]], grad_fn=<SoftmaxBackward>)
tensor([[0.3238, 0.3256, 0.1935, 0.1571]], grad_fn=<SoftmaxBackward>)
tensor([[0.2588, 0.2280, 0.2063, 0.3069]], grad_fn=<SoftmaxBackward>)
tensor([[0.1691, 0.1947, 0.4126, 0.2236]], grad_fn=<SoftmaxBackward>)
tensor([[0.2274, 0.2478, 0.2144, 0.3104]], grad_fn=<SoftmaxBackward>)


In [15]:
class RNN(nn.Module):
    def __init__(self, vocab_size, emb_size, padding_idx, output_size, hidden_size, device):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.emb = nn.Embedding(vocab_size, emb_size, padding_idx=padding_idx)
        self.rnn = nn.RNN(emb_size, hidden_size, nonlinearity='tanh',batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        self.batch_size = x.size()[0]
        emb = self.emb(x)
        hidden = self.init_hidden()
        out, hidden = self.rnn(emb.to(device), hidden.to(device))
        out = self.fc(out[:,-1,:])
        return out
    
    def init_hidden(self):
        return torch.zeros(1, self.batch_size, self.hidden_size)

In [16]:
import time
from torch.utils.data import DataLoader
from torch import optim

In [17]:
def calc_loss_accuracy(net, dataset, criterion, device):
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False)
    
    loss = 0.0
    total = 0
    correct = 0
    
    with torch.no_grad():
        for inputs, labels in dataloader:
            #inputsとlabelsをdeviceに送る
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = net(inputs)
            loss += criterion(outputs, labels).item()
            pred = torch.argmax(outputs, dim=-1)
            total += len(inputs)
            correct += (pred == labels).sum().item()
            
    return loss / len(dataloader), correct/total

In [18]:
def train_model(net, train_ds, valid_ds, criterion, optimizer, batch_size, num_epochs, device=None):
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    valid_dl = DataLoader(valid_ds, batch_size=len(valid_ds), shuffle=False)
    
    log_train = []
    log_valid = []
    #モデルをdeviceに送る
    net.to(device)
    
    for epoch in range(num_epochs):
        net.train()
        start_time = time.time()
        
        for inputs, labels in train_dl:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            outputs = net.forward(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
         
        net.eval()
            
        loss_train, acc_train = calc_loss_accuracy(net, train_ds, criterion, device)
        loss_valid, acc_valid = calc_loss_accuracy(net, valid_ds, criterion, device)
    
        log_train.append([loss_train, acc_train])
        log_valid.append([loss_valid, acc_valid])
    
        torch.save({'epoch': epoch, 'model_state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict()}, f'checkpoint{epoch + 1}.pt')
        
        end_time = time.time()
        
        print('Epoch {}/{} | loss_train : {:.4f} | acc_train : {:.4f} | loss_valid : {:.4f} | acc_valid : {:.4f} | time : {:.4f}'.format(epoch+1, num_epochs, loss_train, acc_train, loss_valid, acc_valid, (end_time - start_time)))

    return {'train' : log_train, 'valid' : log_valid}   

In [19]:
VOCAB_SIZE = len(set(word2id.values())) + 1  
EMB_SIZE = 300
PADDING_IDX = len(set(word2id.values()))
OUTPUT_SIZE = 4
HIDDEN_SIZE = 50

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('使用デバイス: ',device)
print('---start---')

# モデルの定義
model = RNN(VOCAB_SIZE, EMB_SIZE, PADDING_IDX, OUTPUT_SIZE, HIDDEN_SIZE, device)

criterion = nn.CrossEntropyLoss()

#optimizerを作る
optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)

num_epochs = 1
batch_size = 1


log = train_model(model, train_ds, valid_ds, criterion, optimizer, batch_size, num_epochs, device=device)

使用デバイス:  cuda:0
---start---
Epoch 1/1 | loss_train : 3.0408 | acc_train : 0.4113 | loss_valid : 3.1503 | acc_valid : 0.3997 | time : 32.7988


In [20]:
class RNN(nn.Module):
  def __init__(self, vocab_size, emb_size, padding_idx, output_size, hidden_size, num_layers=1, emb_weights=None, bidirectional=False, device=None):
    super().__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.num_directions = bidirectional + 1
    if emb_weights is None:
        self.emb = nn.Embedding(vocab_size, emb_size, padding_idx=padding_idx)
    else:
        self.emb = nn.Embedding.from_pretrained(emb_weights)
    self.rnn = nn.RNN(emb_size, hidden_size, num_layers, nonlinearity='tanh', bidirectional=bidirectional, batch_first=True)
    self.fc = nn.Linear(hidden_size * self.num_directions, output_size)

  def forward(self, x):
    self.batch_size = x.size()[0]
    
    emb = self.emb(x)
    hidden = self.init_hidden()

    out, hidden = self.rnn(emb.to(device), hidden.to(device))
    out = self.fc(out[:, -1, :])
    return out

  def init_hidden(self):
    hidden = torch.zeros(self.num_layers * self.num_directions, self.batch_size, self.hidden_size)
    return hidden

In [21]:
def train_model(net, train_ds, valid_ds, criterion, optimizer, batch_size, num_epochs, collate_fn=None, device=None):
    if collate_fn is None:
        train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    else:
        train_dl = DataLoader(train_ds, batch_size=batch_size, collate_fn=collate_fn, shuffle=True)
    valid_dl = DataLoader(valid_ds, batch_size=len(valid_ds), shuffle=False)
    
    log_train = []
    log_valid = []
    #モデルをdeviceに送る
    net.to(device)
    
    for epoch in range(num_epochs):
        net.train()
        start_time = time.time()
        
        for inputs, labels in train_dl:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            outputs = net.forward(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
        net.eval()
            
            
        loss_train, acc_train = calc_loss_accuracy(net, train_ds, criterion, device)
        loss_valid, acc_valid = calc_loss_accuracy(net, valid_ds, criterion, device)
    
        log_train.append([loss_train, acc_train])
        log_valid.append([loss_valid, acc_valid])
    
        torch.save({'epoch': epoch, 'model_state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict()}, f'checkpoint{epoch + 1}.pt')
        
        end_time = time.time()
        
        print('Epoch {}/{} | loss_train : {:.4f} | acc_train : {:.4f} | loss_valid : {:.4f} | acc_valid : {:.4f} | time : {:.4f}'.format(epoch+1, num_epochs, loss_train, acc_train, loss_valid, acc_valid, (end_time - start_time)))

    return {'train' : log_train, 'valid' : log_valid}   

In [22]:
class collate_fn():
    def __init__(self, padding_idx):
        self.padding_idx = padding_idx
        
    def __call__(self, batch):
        sorted_batch = sorted(batch, key=lambda x : x[0].shape, reverse=True)
        sequences = [x[0].squeeze(0) for x in sorted_batch]
        sequences_padded = torch.nn.utils.rnn.pad_sequence(sequences, batch_first=True, padding_value=0)
        labels = torch.LongTensor([x[1] for x in sorted_batch])
        return sequences_padded, labels

In [23]:
# パラメータの設定
VOCAB_SIZE = len(set(word2id.values())) + 1
EMB_SIZE = 300
PADDING_IDX = len(set(word2id.values()))
OUTPUT_SIZE = 4
HIDDEN_SIZE = 50
NUM_LAYERS = 1
EMB_WEIGHTS = None
BIDIRECTIONAL = False
LEARNING_RATE = 5e-2
BATCH_SIZE = 32
NUM_EPOCHS = 10

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('使用デバイス: ',device)
print('---start---')

# モデルの定義
model = RNN(VOCAB_SIZE, EMB_SIZE, PADDING_IDX, OUTPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, emb_weights= EMB_WEIGHTS, bidirectional=BIDIRECTIONAL, device=device)

# 損失関数の定義
criterion = nn.CrossEntropyLoss()

# オプティマイザの定義
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)



# モデルの学習
log = train_model(model, train_ds, valid_ds, criterion, optimizer, BATCH_SIZE, NUM_EPOCHS, collate_fn=collate_fn(PADDING_IDX), device=device)

使用デバイス:  cuda:0
---start---
Epoch 1/10 | loss_train : 1.3718 | acc_train : 0.3490 | loss_valid : 1.3406 | acc_valid : 0.3615 | time : 10.2565
Epoch 2/10 | loss_train : 1.3139 | acc_train : 0.3903 | loss_valid : 1.2935 | acc_valid : 0.3855 | time : 10.3508
Epoch 3/10 | loss_train : 1.2951 | acc_train : 0.4108 | loss_valid : 1.2792 | acc_valid : 0.4154 | time : 10.3259
Epoch 4/10 | loss_train : 1.2892 | acc_train : 0.4252 | loss_valid : 1.2819 | acc_valid : 0.4266 | time : 10.2097
Epoch 5/10 | loss_train : 1.2720 | acc_train : 0.4360 | loss_valid : 1.2706 | acc_valid : 0.4274 | time : 9.5594
Epoch 6/10 | loss_train : 1.2657 | acc_train : 0.4430 | loss_valid : 1.2643 | acc_valid : 0.4304 | time : 9.5576
Epoch 7/10 | loss_train : 1.2542 | acc_train : 0.4618 | loss_valid : 1.2620 | acc_valid : 0.4461 | time : 9.5380
Epoch 8/10 | loss_train : 1.2489 | acc_train : 0.4779 | loss_valid : 1.2689 | acc_valid : 0.4633 | time : 10.1632
Epoch 9/10 | loss_train : 1.2089 | acc_train : 0.5049 | loss_va

In [24]:
from gensim.models import KeyedVectors
import numpy as np

In [25]:
# 学習済みモデルのロード
model = KeyedVectors.load_word2vec_format('./GoogleNews-vectors-negative300.bin.gz', binary=True)

# 学習済み単語ベクトルの取得
VOCAB_SIZE = len(set(word2id.values())) + 1
EMB_SIZE = 300
weights = np.zeros((VOCAB_SIZE, EMB_SIZE))
words_in_pretrained = 0
for i, word in enumerate(word2id.keys()):
  try:
    weights[i] = model[word]
    words_in_pretrained += 1
  except KeyError:
    weights[i] = np.random.normal(scale=0.4, size=(EMB_SIZE,))
weights = torch.from_numpy(weights.astype((np.float32)))

print(f'学習済みベクトル利用単語数: {words_in_pretrained} / {VOCAB_SIZE}')
print(weights.size())

学習済みベクトル利用単語数: 9174 / 9406
torch.Size([9406, 300])


In [26]:
# パラメータの設定
VOCAB_SIZE = len(set(word2id.values())) + 1
EMB_SIZE = 300
PADDING_IDX = len(set(word2id.values()))
OUTPUT_SIZE = 4
HIDDEN_SIZE = 50
NUM_LAYERS = 1
BIDIRECTIONAL = False
LEARNING_RATE = 5e-2
BATCH_SIZE = 32
NUM_EPOCHS = 10

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('使用デバイス: ',device)
print('---start---')

# モデルの定義
model = RNN(VOCAB_SIZE, EMB_SIZE, PADDING_IDX, OUTPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, emb_weights=weights, bidirectional=BIDIRECTIONAL, device=device)

# 損失関数の定義
criterion = nn.CrossEntropyLoss()

# オプティマイザの定義
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)



# モデルの学習
log = train_model(model, train_ds, valid_ds, criterion, optimizer, BATCH_SIZE, NUM_EPOCHS, collate_fn=collate_fn(PADDING_IDX), device=device)

使用デバイス:  cuda:0
---start---
Epoch 1/10 | loss_train : 1.2730 | acc_train : 0.3984 | loss_valid : 1.2628 | acc_valid : 0.3922 | time : 10.2278
Epoch 2/10 | loss_train : 1.2606 | acc_train : 0.3973 | loss_valid : 1.2481 | acc_valid : 0.3877 | time : 10.2567
Epoch 3/10 | loss_train : 1.4144 | acc_train : 0.4088 | loss_valid : 1.3938 | acc_valid : 0.4139 | time : 10.2256
Epoch 4/10 | loss_train : 1.1930 | acc_train : 0.4124 | loss_valid : 1.1877 | acc_valid : 0.4019 | time : 10.2624
Epoch 5/10 | loss_train : 1.2301 | acc_train : 0.3943 | loss_valid : 1.2293 | acc_valid : 0.3877 | time : 10.2654
Epoch 6/10 | loss_train : 1.3151 | acc_train : 0.3964 | loss_valid : 1.3098 | acc_valid : 0.3960 | time : 10.2820
Epoch 7/10 | loss_train : 1.2299 | acc_train : 0.3963 | loss_valid : 1.2296 | acc_valid : 0.3937 | time : 10.2760
Epoch 8/10 | loss_train : 1.2546 | acc_train : 0.3965 | loss_valid : 1.2524 | acc_valid : 0.3900 | time : 10.2475
Epoch 9/10 | loss_train : 1.2731 | acc_train : 0.3961 | loss

In [27]:
# パラメータの設定
VOCAB_SIZE = len(set(word2id.values())) + 1
EMB_SIZE = 300
PADDING_IDX = len(set(word2id.values()))
OUTPUT_SIZE = 4
HIDDEN_SIZE = 50
NUM_LAYERS = 2
BIDIRECTIONAL = True
LEARNING_RATE = 5e-2
BATCH_SIZE = 32
NUM_EPOCHS = 10

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('使用デバイス: ',device)
print('---start---')

# モデルの定義
model = RNN(VOCAB_SIZE, EMB_SIZE, PADDING_IDX, OUTPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, emb_weights=weights, bidirectional=BIDIRECTIONAL, device=device)

# 損失関数の定義
criterion = nn.CrossEntropyLoss()

# オプティマイザの定義
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)


# モデルの学習
log = train_model(model, train_ds, valid_ds, criterion, optimizer, BATCH_SIZE, NUM_EPOCHS, collate_fn=collate_fn(PADDING_IDX), device=device)

使用デバイス:  cuda:0
---start---
Epoch 1/10 | loss_train : 1.2566 | acc_train : 0.3916 | loss_valid : 1.2514 | acc_valid : 0.3877 | time : 16.2221
Epoch 2/10 | loss_train : 1.2526 | acc_train : 0.3963 | loss_valid : 1.2493 | acc_valid : 0.3952 | time : 15.4210
Epoch 3/10 | loss_train : 1.2297 | acc_train : 0.3964 | loss_valid : 1.2301 | acc_valid : 0.3945 | time : 15.2912
Epoch 4/10 | loss_train : 1.2841 | acc_train : 0.3972 | loss_valid : 1.2789 | acc_valid : 0.3967 | time : 15.2868
Epoch 5/10 | loss_train : 1.2393 | acc_train : 0.3892 | loss_valid : 1.2362 | acc_valid : 0.3885 | time : 15.3430
Epoch 6/10 | loss_train : 1.2853 | acc_train : 0.3963 | loss_valid : 1.2791 | acc_valid : 0.3945 | time : 15.3567
Epoch 7/10 | loss_train : 1.0834 | acc_train : 0.5845 | loss_valid : 1.0756 | acc_valid : 0.5868 | time : 15.3629
Epoch 8/10 | loss_train : 1.1129 | acc_train : 0.6151 | loss_valid : 1.1426 | acc_valid : 0.5868 | time : 15.3917
Epoch 9/10 | loss_train : 1.0250 | acc_train : 0.6171 | loss

In [28]:
from torch.nn import functional as F

In [40]:
class CNN(nn.Module):
    def __init__(self, vocab_size, emb_size, padding_idx, output_size, out_channels, kernel_heights, stride, padding, emb_weights=None):
        super().__init__()
        if emb_weights is None:
            self.emb = nn.Embedding(vocab_size, emb_size, padding_idx=padding_idx)
        else:
            self.emb = nn.Embedding.from_pretrained(emb_weights)
        self.conv = nn.Conv2d(1, out_channels, (kernel_heights, emb_size), stride, (padding, 0))
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(out_channels, output_size)
        
    def forward(self, x):
        emb = self.emb(x).unsqueeze(1)
        conv = self.conv(emb)
        act = F.relu(conv.squeeze(3))
        max_pool = F.max_pool1d(act, act.size()[2])
        out = self.fc(self.dropout(max_pool.squeeze(2)))
        return out

In [41]:
# パラメータの設定
VOCAB_SIZE = len(set(word2id.values())) + 1
EMB_SIZE = 300
PADDING_IDX = len(set(word2id.values()))
OUTPUT_SIZE = 4
OUT_CHANNELS = 100
KERNEL_HEIGHTS = 3
STRIDE = 1
PADDING = 1

# モデルの定義
model = CNN(VOCAB_SIZE, EMB_SIZE, PADDING_IDX, OUTPUT_SIZE, OUT_CHANNELS, KERNEL_HEIGHTS, STRIDE, PADDING, emb_weights=weights)

In [42]:
for i in range(10):
    X = train_ds[i][0]
    print(torch.softmax(model(X.unsqueeze(0)), dim=-1))

tensor([[0.3010, 0.2337, 0.2594, 0.2059]], grad_fn=<SoftmaxBackward>)
tensor([[0.2692, 0.2649, 0.2577, 0.2083]], grad_fn=<SoftmaxBackward>)
tensor([[0.2856, 0.2277, 0.2558, 0.2309]], grad_fn=<SoftmaxBackward>)
tensor([[0.2739, 0.2549, 0.2586, 0.2126]], grad_fn=<SoftmaxBackward>)
tensor([[0.2669, 0.2679, 0.2332, 0.2320]], grad_fn=<SoftmaxBackward>)
tensor([[0.3145, 0.2504, 0.2185, 0.2167]], grad_fn=<SoftmaxBackward>)
tensor([[0.2770, 0.2491, 0.2433, 0.2306]], grad_fn=<SoftmaxBackward>)
tensor([[0.2578, 0.2818, 0.2268, 0.2335]], grad_fn=<SoftmaxBackward>)
tensor([[0.2768, 0.2822, 0.2531, 0.1878]], grad_fn=<SoftmaxBackward>)
tensor([[0.2837, 0.2486, 0.2410, 0.2266]], grad_fn=<SoftmaxBackward>)


In [44]:
VOCAB_SIZE = len(set(word2id.values())) + 1
EMB_SIZE = 300
PADDING_IDX = len(set(word2id.values()))
OUTPUT_SIZE = 4
OUT_CHANNELS = 100
KERNEL_HEIGHTS = 3
STRIDE = 1
PADDING = 1
LEARNING_RATE = 5e-2
BATCH_SIZE = 64
NUM_EPOCHS = 10

# モデルの定義
model = CNN(VOCAB_SIZE, EMB_SIZE, PADDING_IDX, OUTPUT_SIZE, OUT_CHANNELS, KERNEL_HEIGHTS, STRIDE, PADDING, emb_weights=weights)

# 損失関数の定義
criterion = nn.CrossEntropyLoss()

# オプティマイザの定義
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

# デバイスの指定
device = torch.device('cuda')

# モデルの学習
log = train_model(model, train_ds, valid_ds,criterion, optimizer, BATCH_SIZE, NUM_EPOCHS, collate_fn=collate_fn(PADDING_IDX), device=device)

Epoch 1/10 | loss_train : 1.1486 | acc_train : 0.4772 | loss_valid : 1.1501 | acc_valid : 0.4843 | time : 8.1657
Epoch 2/10 | loss_train : 1.0480 | acc_train : 0.6396 | loss_valid : 1.0665 | acc_valid : 0.6310 | time : 8.1127
Epoch 3/10 | loss_train : 0.9385 | acc_train : 0.6990 | loss_valid : 0.9699 | acc_valid : 0.6834 | time : 8.1369
Epoch 4/10 | loss_train : 0.8749 | acc_train : 0.6987 | loss_valid : 0.9226 | acc_valid : 0.6751 | time : 8.1396
Epoch 5/10 | loss_train : 0.7981 | acc_train : 0.7270 | loss_valid : 0.8606 | acc_valid : 0.7013 | time : 8.1190
Epoch 6/10 | loss_train : 0.7518 | acc_train : 0.7441 | loss_valid : 0.8302 | acc_valid : 0.7133 | time : 8.1243
Epoch 7/10 | loss_train : 0.6976 | acc_train : 0.7573 | loss_valid : 0.7978 | acc_valid : 0.7208 | time : 8.1286
Epoch 8/10 | loss_train : 0.6517 | acc_train : 0.7712 | loss_valid : 0.7673 | acc_valid : 0.7290 | time : 8.1359
Epoch 9/10 | loss_train : 0.6135 | acc_train : 0.7825 | loss_valid : 0.7502 | acc_valid : 0.7313

In [45]:
from torch.nn import TransformerEncoder, TransformerEncoderLayer

class TransformerModel(nn.Module):

    def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5):
        super(TransformerModel, self).__init__()
        self.model_type = 'Transformer'
        self.pos_encoder = PositionalEncoding(ninp, dropout)
        encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.encoder = nn.Embedding(ntoken, ninp)
        self.ninp = ninp
        self.decoder = nn.Linear(ninp, ntoken)

        self.init_weights()

    def generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src, src_mask):
        src = self.encoder(src) * math.sqrt(self.ninp)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src, src_mask)
        output = self.decoder(output)
        return output

ImportError: cannot import name 'TransformerEncoder' from 'torch.nn' (/usr/local/lib/python3.7/site-packages/torch/nn/__init__.py)

In [46]:
import sys
print(sys.path)

['/export/home/yuka/hyappon/sec9', '/usr/local/DL-Box/digits-2.0/caffe/python', '/usr/local/lib/python37.zip', '/usr/local/lib/python3.7', '/usr/local/lib/python3.7/lib-dynload', '', '/usr/local/lib/python3.7/site-packages', '/usr/local/lib/python3.7/site-packages/IPython/extensions', '/export/home/yuka/.ipython']
