In [1]:
import math
import torch
from torch import nn, Tensor
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.utils.data import dataset
from torchtext import data
from torchtext.legacy import data
from torchtext.data.utils import get_tokenizer
import pandas as pd
import time
import datetime
import copy
import gc

In [None]:
# データの前処理（Word Embedding）

# テキストを単語で分割
tokenizer = get_tokenizer('basic_english')

# data field定義
TEXT_N  = data.Field(sequential=True,
                     lower=True,
                     batch_first=True, 
                     tokenize=tokenizer,
                     init_token='<cls>')
#SECTION = data.Field(sequential=False, use_vocab=False)
#TREND_N = data.Field(sequential=False, use_vocab=False)
#PRICE_N = data.Field(sequential=False, use_vocab=False)
LABEL = data.Field(sequential=False, use_vocab=False)


# CSVファイルを読み込み、TabularDatasetオブジェクトの作成
train_data, test_data = data.TabularDataset.splits(path ='tweet-transformer/1h',
                                                   train='test7_v4.csv',
                                                   test ='test7_v4.csv',
                                                   format='csv',
                                                   skip_header = True,
                                                   fields=[('tweet_n', TEXT_N),
                                                           #('section', SECTION),
                                                           #('trend_n', TREND_N),
                                                           #('price_n', PRICE_N),
                                                           ('label', LABEL)])
print("データ読み込み完了")

# 単語辞書の作成
TEXT_N.build_vocab(train_data, min_freq=2)
vocab = TEXT_N.vocab
print('辞書作成完了')

# テキストを数値ベクトル化、バッチに分割
batch_size = 128
train_iter, test_iter = data.BucketIterator.splits((train_data, test_data),
                                                   batch_sizes=(batch_size, batch_size),
                                                   shuffle=True)

print(len(train_data))
print(len(train_iter))

# メモリ解放
del train_data,test_data
gc.collect()
print('メモリ解放')

In [2]:
# parametator for Net
#ntokens = len(vocab)  # size of vocabulary
ntokens = 3000  # size of vocabulary
d_model = 512  # embedding dimension
nhead   = 8    # number of heads in nn.MultiheadAttention
d_hid   = 2048  # dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 6    # number of nn.TransformerEncoderLayer in nn.TransformerEncoder
dropout = 0.2  # dropout probability

In [3]:
# Transformerモデルの概要
class Net(nn.Module):

    def __init__(self,
                 ntoken: int,
                 d_model: int,
                 nhead: int,
                 d_hid: int,
                 nlayers: int,
                 dropout: float = 0.5):

        super().__init__()
        self.model_type = 'Transformer'
        self.d_model = d_model
        self.emb = nn.Embedding(ntoken, d_model, padding_idx=0)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = TransformerEncoderLayer(d_model, nhead, d_hid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.classifer = nn.Linear(d_model, 3)
        #self.softmax = nn.Softmax()

        self.init_weights()

    def init_weights(self) -> None:
        initrange = 0.1
        self.emb.weight.data.uniform_(-initrange, initrange)
        self.classifer.bias.data.zero_()
        self.classifer.weight.data.uniform_(-initrange, initrange)

    #データの流れ
    #def forward(self, src: Tensor, src_mask: Tensor) -> Tensor:
    def forward(self, src: Tensor) -> Tensor:
        '''
        Args:
            src: Tensor, shape [batch_size, seq_len]
            src_mask: Tensor, shape [seq_len, seq_len]

        Returns:
            output Tensor of shape [batch_size, nclass:3]
        '''

        embedded = self.emb(src) * math.sqrt(self.d_model)
        pos = self.pos_encoder(embedded)
        encoder_out = self.transformer_encoder(pos)
        x = encoder_out.mean(dim=1)
        output = self.classifer(x)
        #output = self.softmax(output)
        return output
        

In [4]:
# PositionalEncodingの概要
class PositionalEncoding(nn.Module):

    def __init__(self,
                 d_model: int,
                 dropout: float = 0.1,
                 max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        '''
        Args:
            x: Tensor, shape [batch_size, seq_len, embedding_dim]
        '''
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

In [34]:
# paramator for training & evaluation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(ntokens, d_model, nhead, d_hid, nlayers, dropout).to(device)
lr = 1e-3
softmax = nn.Softmax(dim=1)
criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.SGD(model.parameters(), lr=lr)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)
torch.manual_seed(0)

<torch._C.Generator at 0x1accd2ff710>

In [35]:
# training
def train(model: nn.Module, train_iter: Tensor):
    train_start_time = time.time()
    model.train()
    num_batches = len(train_iter)
    log_interval = math.ceil(num_batches/100)*10
    batch_counter = 0
    train_loss = 0
    train_correct = 0
    train_count = 0
    
    for idx, batch in enumerate(iter(train_iter)):
        predictions = model(batch.tweet_n.to(device))
        prob = softmax(predictions)
        labels = batch.label.to(device)

        loss = criterion(predictions, labels)
        
        '''
        print('softmax')
        print(prob)
        print('予測結果')
        print(prob.argmax(axis=1))
        print('答え')
        print(labels)
        '''
        
        correct = prob.argmax(axis=1) == labels
        acc = correct.sum().item() / correct.size(0)

        train_correct += correct.sum().item()
        train_count += correct.size(0)
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()
        
        batch_counter += 1
        
        if batch_counter % log_interval == 0 or batch_counter == num_batches:
            lr = scheduler.get_last_lr()[0]
            s_per_batch = (time.time() - train_start_time) / log_interval
            cur_loss = train_loss / log_interval
            cur_acc = train_correct / train_count
            print(f'| epoch {epoch:3d} | {batch_counter:5d}/{num_batches:5d} batches | '
                  f'lr {lr:1.5f} | s/batch {s_per_batch:5.2f} | '
                  f'loss {cur_loss:5.2f} | accuracy {cur_acc:8.2f}')
            total_loss = 0
            start_time = time.time()

In [36]:
# evaluation (val, test)
def evaluate(model: nn.Module, eval_iter: Tensor):
    model.eval()
    eval_loss = 0
    eval_correct = 0
    eval_count = 0

    with torch.no_grad():
        for idx, batch in enumerate(iter(eval_iter)):
            predictions = model(batch.tweet_n.to(device))
            prob = softmax(predictions)
            labels = batch.label.to(device)

            loss = criterion(predictions, labels)

            correct = prob.argmax(axis=1) == labels
            acc = correct.sum().item() / correct.size(0)

            eval_correct += correct.sum().item()
            eval_count += correct.size(0)
            eval_loss += loss.item()
        
    print(f'| loss {eval_loss}| accuracy {eval_correct / ecal_count} ')
        
    return eval_loss, eval_correct / eval_count

In [37]:
# main
# training roop
best_val_loss = float('inf')
epochs = 50
best_model = None

dt_start = datetime.datetime.now()
print(datetime.datetime.now())
print('学習開始')
print('-' * 95)

for epoch in range(1, epochs + 1):
    epoch_start_time = time.time()
    train(model, train_iter)
#    val_loss, val_acc = evaluate(model, val_iter)
    elapsed = time.time() - epoch_start_time
    print('-' * 95)
    print(f'| end of epoch {epoch:3d} | time: {elapsed:5.2f}s | ')
#          f'valid loss {val_loss:5.2f} | valid accuracy {val_acc:8.2f}')
    print('-' * 95)

#    if val_loss < best_val_loss:
#        best_val_loss = val_loss
#        best_model = copy.deepcopy(model)

    scheduler.step()
 
dt_end = datetime.datetime.now()
print(datetime.datetime.now())    
print(f'経過時間：{dt_end - dt_start}')
print('学習終了')


2021-10-25 13:57:51.487780
学習開始
-----------------------------------------------------------------------------------------------
| epoch   1 |     8/    8 batches | lr 0.00100 | s/batch  0.26 | loss  3.15 | accuracy     0.38
-----------------------------------------------------------------------------------------------
| end of epoch   1 | time:  2.57s | 
-----------------------------------------------------------------------------------------------
| epoch   2 |     8/    8 batches | lr 0.00095 | s/batch  0.24 | loss  1.19 | accuracy     0.35
-----------------------------------------------------------------------------------------------
| end of epoch   2 | time:  2.45s | 
-----------------------------------------------------------------------------------------------
| epoch   3 |     8/    8 batches | lr 0.00090 | s/batch  0.23 | loss  0.94 | accuracy     0.41
-----------------------------------------------------------------------------------------------
| end of epoch   3 | time:  2.

In [None]:
# main
# test
test_loss, test_acc = evaluate(best_model, test_iter)

print('=' * 89)
print(f'| End of training | test loss {test_loss:5.2f} | '
      f'test accuracy {test_acc:8.2f}')
print('=' * 89)

In [45]:
# Transformerモデルの概要
class Net(nn.Module):

    def __init__(self):
        super().__init__()

    #データの流れ
    def forward(self, src) -> Tensor:
        x=src[0]
        output = x.mean(dim=1)
        return output
        

In [46]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)

In [47]:
x = torch.tensor([[1,1,1,2],[1,1,1,3],[1,1,1,4],[1,1,1,5],[1,1,1,1]],dtype=torch.float)
l = [x,x,x]
print(x.ndim)
out = model(l)
print(out)

2
tensor([1.2500, 1.5000, 1.7500, 2.0000, 1.0000])
