In [1]:
import math
import torch
import torch.nn.functional as F
from torch import nn, Tensor
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.utils.data import Dataset, DataLoader
from torchtext import data
from torchtext.legacy import data
from torchtext.data.utils import get_tokenizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from tqdm import tqdm
import pandas as pd
import itertools
import datetime
import shutil
import pickle
import random
import time
import copy
import sys
import gc
import os

In [2]:
# pickle書き込み
def write_pickle(filepath, data):
    start_time = time.time()
    print(f'writing pickle to "{filepath}" ...')    
    
    with open(filepath, 'wb') as p:
        pickle.dump(data,p)
    
    print(f'end of writeing {time.time()-start_time:6.2f} s')
    
    del start_time
    gc.collect()

In [3]:
# pickle書き込み
# ログなしVer
def write_pickle_quickly(filepath, data):
    with open(filepath, 'wb') as p:
        pickle.dump(data,p)

In [4]:
# pickle読み出し
def read_pickle(filepath):
    start_time = time.time()
    print(f'reading pickle from "{filepath}" ...')
    
    with open(filepath, 'rb') as p:
        data = pickle.load(p)
    
    print(f'end of reading {time.time()-start_time:6.2f} s')
    
    del start_time
    gc.collect()
    
    return data

In [5]:
# pickle読み出し
# ログなしVer
def read_pickle_quickly(filepath):
    with open(filepath, 'rb') as p:
        data = pickle.load(p)
    return data

In [6]:
# vacab作成
# テキストを単語で分割

v_start = time.time()
print("Reading...")
vocab = read_pickle('../../external_drive/pickle/vocab.pickle')
print('Finish!!')
print(f'{time.time() - v_start:5.2f} s')
del v_start
gc.collect()

'''
v_start = time.time()
tokenizer = get_tokenizer('basic_english')

# data field定義
TEXT  = data.Field(sequential=True,
                     lower=True,
                     batch_first=True, 
                     tokenize=tokenizer,
                     init_token='<cls>')

# CSVファイルを読み込み、TabularDatasetオブジェクトの作成
print("Reading...")
vocab_data = data.TabularDataset(path ='tweet-transformer/1d/2021-17_t.csv',
                                       format='csv',
                                       skip_header = True,
                                       fields=[('tweet', TEXT)])

# 単語辞書の作成
print("Creating vocab...")
TEXT.build_vocab(vocab_data, min_freq=3)
vocab = TEXT.vocab
print(f'{len(vocab)=}')

print('Finish!!')
print(f'{time.time() - v_start:5.2f} s')

# メモリ開放
del v_start, vocab_data, tokenizer, TEXT
gc.collect()
'''

Reading...
reading pickle from "../../external_drive/pickle/vocab.pickle" ...
end of reading   1.98 s
Finish!!
 2.06 s


'\nv_start = time.time()\ntokenizer = get_tokenizer(\'basic_english\')\n\n# data field定義\nTEXT  = data.Field(sequential=True,\n                     lower=True,\n                     batch_first=True, \n                     tokenize=tokenizer,\n                     init_token=\'<cls>\')\n\n# CSVファイルを読み込み、TabularDatasetオブジェクトの作成\nprint("Reading...")\nvocab_data = data.TabularDataset(path =\'tweet-transformer/1d/2021-17_t.csv\',\n                                       format=\'csv\',\n                                       skip_header = True,\n                                       fields=[(\'tweet\', TEXT)])\n\n# 単語辞書の作成\nprint("Creating vocab...")\nTEXT.build_vocab(vocab_data, min_freq=3)\nvocab = TEXT.vocab\nprint(f\'{len(vocab)=}\')\n\nprint(\'Finish!!\')\nprint(f\'{time.time() - v_start:5.2f} s\')\n\n# メモリ開放\ndel v_start, vocab_data, tokenizer, TEXT\ngc.collect()\n'

In [7]:
# Dataset1の定義
# args　：tdf['ids'], tdf['mask']
# return：dataset{ids,mask}

class CreateDataset1(Dataset):
    def __init__(self, x, y):
        self.x = x # tdf['ids']
        self.y = y # tdf['mask']
        
    # len(Dataset)で返す値を指定
    def __len__(self):
        return len(self.x)

    # Dataset[index]で返す値を指定
    def __getitem__(self, index):
        ids  = self.x[index]
        mask = self.y[index]

        return {'ids'   : ids,
                'mask'  : mask}

In [8]:
# Dataset3の定義
# args　：tensor of section, tensor of price, tensor of trend(n+1)
# return：dataset3{section, src, target}
class CreateDataset3(Dataset):
    def __init__(self, x, y, z):
        self.x = x # tensor of section
        self.y = y # tensor of price
        self.z = z # tensor of trend(n+1)
        
    # len(Dataset)で返す値を指定
    def __len__(self):
        return len(self.x)

    # Dataset[index]で返す値を指定
    def __getitem__(self, index):
        section = self.x[index]
        src     = self.y[index]
        target  = self.z[index]

        return {'section': section,
                'src'    : src,
                'target' : target}

In [9]:
# tensor_df()内の関数
max_len = 128
tokenizer = get_tokenizer('basic_english')
def tokenize(text):
    return tokenizer(text)

def text_to_ids(tokenized_text):
    ids  = torch.tensor([vocab[word] for word in tokenized_text], dtype=torch.long).unsqueeze(0) # [1,seq_len]
    ids  = F.pad(ids, (0 ,max_len-len(tokenized_text)), "constant", 0) # [1,max_len]
    return ids

def ids_to_mask(ids):
    mask = (ids==0)
    return mask

In [10]:
# return df[section.ids,mask]
def tensor_df(df):
    start_time = time.time()
    
    print('1.text to ids...')
    df['tweet(n)'] = df['tweet(n)'].apply(tokenize)
    df['ids']      = df['tweet(n)'].apply(text_to_ids)
    print('2.ids to mask...')
    df['mask']     = df['ids'].apply(ids_to_mask)
    df = df.drop(columns=['tweet(n)'])
    
    print(f'end of df to tensor {time.time()-start_time:6.2f} s')
    
    return df

In [11]:
# return : df[section,ids(n),mask(n)]
# sectionごとにtensor連結
train_num_section_list = {'1d':181,'12h':363,'4h':1090,'1h':4361,'30m':8722,'15m':17444,'5m':52333}
test_num_section_list  = {'1d':31, '12h':61, '4h':182, '1h':727, '30m':1454,'15m':2908, '5m':8723}

def separate_section(df):
    num_section = train_num_section_list[timespan] + test_num_section_list[timespan]
    section_list = []
    ids_list  = []
    mask_list = []
    for i in range (0,num_section):
        #pandasのSectionkを抽出　idsとmaskをひとまとまりのtensorに
        section_list.append(i)
        df_ids = df[df['section'] == i]['ids']
        l = df_ids.values.tolist()
        x = torch.cat(l, dim=0)
        ids_list.append(x)
        df_mask = df[df['section'] == i]['mask']
        l = df_mask.values.tolist()
        x = torch.cat(l, dim=0)
        mask_list.append(x)
    
    df = pd.DataFrame(list(zip(section_list, ids_list, mask_list)), columns = ['section','ids','mask'])
    
    return df        

In [12]:
# Datasetの作成 (ツイート)
# 1. df to tensor
# 2. separate_section
def data_process1(timespan):
    print('-'*5 + 'Create dataset_tlist start!!' + '-'*5)

    print('Reading...')
    dfs = pd.read_csv(f'tweet-transformer/{timespan}/2021-17_s.csv')
    df = pd.read_csv(f'tweet-transformer/{timespan}/2021-17_t.csv')
    df = df.dropna(how='any')
    df = df.reset_index(drop=True)
    
    print('df to tensor...')
    df = tensor_df(df)
    
    print('Separating Section...')
    df = separate_section(df)
        
    train_tdf, test_tdf = train_test_split(df, test_size = 1/7, shuffle=False)
    
    print('Finish!!')
    print(f'{len(train_tdf)=}')
    print(f'{len(test_tdf)=}')
    
    return train_tdf, test_tdf

In [13]:
# Datasetの作成 (価格)
# 1.csv -> 3 tensor
# 2.CreateDataset3
def data_process2(timespan,n):
    print('-'*5 + 'Create dataset_plist start!!' + '-'*5)

    print('Reading...')
    df  = pd.read_csv(f'tweet-transformer/{timespan}/2021-17_b.csv')
    dfs = pd.read_csv(f'tweet-transformer/{timespan}/2021-17_s.csv')

    # 説明変数、目的変数
    df['trend(n+1)'] = df['trend(n)'].shift(-1)
    df['end_price(n)'] = df['open_price(n)'].shift(-1)
    if n >= 2:
        for i in range(1,n):
            df[f'trend(n-{i})'] = df['trend(n)'].shift(i)
            df[f'end_price(n-{i})'] = df['end_price(n)'].shift(i)
    df = df.drop(columns=['open_price(n)'])
    df = df.dropna(how='any')
    df = df.reset_index(drop=True)

    # マージして欠損値を含む行を処理
    df = pd.merge(dfs, df, on="section", how = 'left')
    df = df.reset_index(drop=True)
    df['bool'] = df.isnull().any(axis=1)
    for i in range(0, len(df)):
        if  df['bool'][i] == True:
            df['section'][i] = -1
    df = df.fillna(-1)
    df = df.drop(columns=['bool'])
    print(f'{len(df)=}')

    train_df, test_df = train_test_split(df, test_size = 1/7, shuffle=False)

    # 3つのtensorを作成
    print('Creating Dataset3...')
    section = torch.tensor(train_df['section'].values)
    target  = torch.tensor(train_df['trend(n+1)'].values)
    price   = torch.tensor(train_df.drop(columns=['trend(n+1)','section']).values)
    train_plist = CreateDataset3(section, price, target)

    section = torch.tensor(test_df['section'].values)
    target  = torch.tensor(test_df['trend(n+1)'].values)
    price   = torch.tensor(test_df.drop(columns=['trend(n+1)','section']).values)
    test_plist = CreateDataset3(section, price, target)


    print('Finish!!')
    print(f'{len(train_plist)=}')
    print(f'{len(test_plist)=}')
    
    del df,dfs,section,price,target,train_df,test_df
    gc.collect()
    
    return train_plist, test_plist

In [14]:
# データセット作成 & csv, pickleに保存
# ツイート
'''
tlist = ['1d','12h','4h','1h','30m','15m','5m']
train_section_list     = {'1d':181,'12h':363,'4h':1090,'1h':4361,'30m':8722,'15m':17444,'5m':52333}
test_section_list      = {'1d':31, '12h':61, '4h':182, '1h':727, '30m':1454,'15m':2908, '5m':8723}

for timespan in tlist:
    print('-'*50 + f'{timespan=}' + '-'*50)
    train_tdf, test_tdf = data_process1(timespan)
    print(f'{sys.getsizeof(train_tdf)=}')
    print(f'{sys.getsizeof(test_tdf)=}')
    train_tdf.to_csv(f'../../external_drive/pandas/{timespan}/train_tdf.csv', index=False)
    test_tdf.to_csv(f'../../external_drive/pandas/{timespan}/test_tdf.csv', index=False) 
    write_pickle(f'../../external_drive/pickle/{timespan}/train_tdf.pickle',train_tdf)
    write_pickle(f'../../external_drive/pickle/{timespan}/test_tdf.pickle',test_tdf)
'''

"\ntlist = ['1d','12h','4h','1h','30m','15m','5m']\ntrain_section_list     = {'1d':181,'12h':363,'4h':1090,'1h':4361,'30m':8722,'15m':17444,'5m':52333}\ntest_section_list      = {'1d':31, '12h':61, '4h':182, '1h':727, '30m':1454,'15m':2908, '5m':8723}\n\nfor timespan in tlist:\n    print('-'*50 + f'{timespan=}' + '-'*50)\n    train_tdf, test_tdf = data_process1(timespan)\n    print(f'{sys.getsizeof(train_tdf)=}')\n    print(f'{sys.getsizeof(test_tdf)=}')\n    train_tdf.to_csv(f'../../external_drive/pandas/{timespan}/train_tdf.csv', index=False)\n    test_tdf.to_csv(f'../../external_drive/pandas/{timespan}/test_tdf.csv', index=False) \n    write_pickle(f'../../external_drive/pickle/{timespan}/train_tdf.pickle',train_tdf)\n    write_pickle(f'../../external_drive/pickle/{timespan}/test_tdf.pickle',test_tdf)\n"

In [15]:
# データセット作成 & pickleに保存
# 価格
'''
nlist = [1,2,3,4,5,6,7,8,9,10]
tlist = ['1d','12h','4h','1h','30m','15m','5m']

for timespan, n in itertools.product(tlist, nlist):   

    print('-'*50 + f'{timespan=} / {n=}' + '-'*50)
    train_plist,test_plist = data_process2(timespan,n)
    write_pickle(f'../../external_drive/pickle/{timespan}/train_plist_{n}.pickle',train_plist)
    write_pickle(f'../../external_drive/pickle/{timespan}/test_plist_{n}.pickle',test_plist)
    
    del train_plist, test_plist
    gc.collect()
'''

"\nnlist = [1,2,3,4,5,6,7,8,9,10]\ntlist = ['1d','12h','4h','1h','30m','15m','5m']\n\nfor timespan, n in itertools.product(tlist, nlist):   \n\n    print('-'*50 + f'{timespan=} / {n=}' + '-'*50)\n    train_plist,test_plist = data_process2(timespan,n)\n    write_pickle(f'../../external_drive/pickle/{timespan}/train_plist_{n}.pickle',train_plist)\n    write_pickle(f'../../external_drive/pickle/{timespan}/test_plist_{n}.pickle',test_plist)\n    \n    del train_plist, test_plist\n    gc.collect()\n"

In [9]:
# parametator for Net
ntokens = len(vocab)  # size of vocabulary
d_model = 512   # embedding dimension
nhead   = 8     # number of heads in nn.MultiheadAttention
d_hid   = 2048  # dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 6     # number of nn.TransformerEncoderLayer in nn.TransformerEncoder
dropout = 0.2   # dropout probability
lstm_input_dim  = 5
lstm_hidden_dim = 16

In [10]:
# Transformer-LSTMモデルの概要
class Net(nn.Module):

    def __init__(self,
                 ntoken: int,
                 d_model: int,
                 nhead: int,
                 d_hid: int,
                 nlayers: int,
                 lstm_input_dim: int, 
                 lstm_hidden_dim: int,
                 dropout: float = 0.5):

        super().__init__()
        self.model_type = 'Transformer'
        self.d_model = d_model
        self.embedding = nn.Embedding(ntoken,
                                d_model)
        self.pos_encoder = PositionalEncoding(d_model,
                                              dropout)
        encoder_layers = TransformerEncoderLayer(d_model,
                                                 nhead,
                                                 d_hid,
                                                 dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers,
                                                      nlayers)
        self.dense1 = nn.Linear(d_model,3)
        self.softmax = nn.Softmax(dim=1)
        
        self.input_dim = lstm_input_dim
        self.hidden_dim = lstm_hidden_dim
        self.lstm = nn.LSTM(input_size=lstm_input_dim, 
                            hidden_size=lstm_hidden_dim,
                            num_layers=1,
                            batch_first=True)
        self.dense2 = nn.Linear(lstm_hidden_dim,3)

        self.init_weights()

    def init_weights(self) -> None:
        initrange = 0.1
        self.embedding.weight.data.uniform_(-initrange, initrange)
        self.dense1.bias.data.zero_()
        self.dense1.weight.data.uniform_(-initrange, initrange)

    #データの流れ
    def forward(self,timespan, n, t_start, t_end, tdf, plist, train_test_flag):

        tbatch_size_list = {'4h':512, '30m':256, '5m':64}
        tbatch_size = tbatch_size_list[timespan]
        if timespan == '5m':
            r = 30
        elif timespan == '30m':
            r = 40
        else:
            r = 60
        idx_counter_list=[]
        
        if train_test_flag == 1:
            t_start += train_num_section 
            t_end  += train_num_section
            
        for j in range(t_start,t_end):   
            tlist = CreateDataset1(tdf['ids'][j], tdf['mask'][j])
            tbatches = iter(DataLoader(tlist, batch_size=tbatch_size_list[timespan], shuffle=True))
            idx_counter = 0

            for idx,batch in enumerate(tbatches): 
                if idx % r == 0:
                    idx_counter += 1
                    ids  =  batch['ids'].to(device)  # [batch_size, seq_len]
                    mask =  torch.t(batch['mask']).to(device) # [seq_len, batch_size]
                    
                    # Transformerによるテキストの3値分類           
                    x = self.embedding(ids) * math.sqrt(self.d_model) # [batch_size, seq_len, d_model]
                    x = self.pos_encoder(x) # [batch_size, seq_len, d_model]
                    x = self.transformer_encoder(src=x, src_key_padding_mask=mask) # [batch_size, seq_len, d_model]
                    x[x != x] = 0 #Nanを0に置き換え
                    x = x.mean(dim=1) # [batch_size, d_model]
                    x = self.dense1(x) # [batch_size, 3]
                    
                    x = softmax(x) # [batch_size, 3] x=prob
                    write_pickle_quickly(f'../../external_drive/pickle/temp/{j}_{idx}.pickle',x.to('cpu'))
                    del ids, mask, x
                    torch.cuda.empty_cache()
                    gc.collect()
            idx_counter_list.append(idx_counter)

        prob_section = []
        for j in range(t_start,t_end):
            p_tbatch = []
            for idx in range(0, idx_counter_list[j-t_start]):
                prob = read_pickle_quickly(f'../../external_drive/pickle/temp/{j}_{idx*r}.pickle') # [batch_size, 3]
                p_tbatch.append(prob)
                os.remove(f'../../external_drive/pickle/temp/{j}_{idx*r}.pickle')
            x = torch.cat(p_tbatch, dim=0) # [batch_size*num_tbatches, 3]
            x = x.sum(dim=0) # [3]
            prob_section.append(x)

        src    = []
        target = []
        
        if train_test_flag == 1:
            t_start -= train_num_section 
            t_end  -= train_num_section

        for j in range(t_start+n-1,t_end):
            if plist[j]['section'] != -1:
                l=[]
                x = plist[j]['src'] #[2n]
                y = torch.cat(prob_section[j-(t_start+n-1): j-(t_start+n-1)+n], dim=-1) # [3n]
                for k in range (0,n):   
                    z = torch.cat((x[k*2:(k+1)*2], y[3*n-(k+1)*3:3*n-k*3]), dim=-1).unsqueeze(0) # [1,5]
                    l.append(z)
                z = torch.cat(l, dim=0).unsqueeze(0) # [1,n,5]
                src.append(z)
                target.append(plist[j]['target'].unsqueeze(0))
        src = torch.cat(src, dim=0).to(torch.float).to(device) # [batch_size, n, 5]
        target = torch.cat(target, dim=-1).to(torch.long) # [batch_size]
            
        # LSTMによるテキスト＋価格の３値分類
        _, x = self.lstm(src)
        x = self.dense2(x[0].view(src.size(0), -1))

        return x, target

In [11]:
# PositionalEncodingの概要
class PositionalEncoding(nn.Module):

    def __init__(self,
                 d_model: int,
                 dropout: float = 0.1,
                 max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        '''
        Args:
            x: Tensor, shape [batch_size, seq_len, embedding_dim]
        '''
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

In [12]:
# paramator for training & evaluation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
softmax = nn.Softmax(dim=1)
criterion = nn.CrossEntropyLoss()
torch.manual_seed(0)

<torch._C.Generator at 0x7f38d9d46fd0>

In [13]:
# training
def train(model, timespan, n, train_tdf, train_plist, epoch):
    model.train()
    train_test_flag = 0
    
    log_interval = math.ceil(train_num_batches/30)*10
    batch_counter = 0
    train_loss = 0
    train_correct = 0
    train_count = 0
    
    if timespan == '4h':
        q = 4
    elif timespan == '30m':
        q = 9
    else:
        q = 25
    
    batch_start_time = time.time()
    
    for i in range(0, train_num_batches):
        if (i+epoch)%q == 0:
            if i*batch_size-n+1 >= 0:      
                if i != (train_num_batches-1):
                    t_start = i*batch_size-n+1
                    t_end   = i*batch_size+batch_size
                else:
                    t_start = i*batch_size-n+1
                    t_end   = len(train_plist)
            else:
                if i != (train_num_batches-1):
                    t_start = 0
                    t_end   = i*batch_size+batch_size
                else:
                    t_start = 0
                    t_end   = len(train_plist)

            predictions, target = model(timespan,n, t_start, t_end, train_tdf, train_plist, train_test_flag)
            prob = softmax(predictions)
            targets = target.to(device)
            loss = criterion(predictions, targets)

            correct = prob.argmax(axis=1) == targets
            train_correct += correct.sum().item()
            train_count += correct.size(0)
            train_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
            optimizer.step()

        batch_counter += 1

        if batch_counter % log_interval == 0 or batch_counter == train_num_batches:
            lr = scheduler.get_last_lr()[0]
            s_per_batch = (time.time() - batch_start_time) / log_interval
            cur_loss = train_loss / log_interval
            cur_acc = train_correct / train_count
            print(f'| epoch {epoch:3d} | {batch_counter:5d}/{train_num_batches:5d} batches | '
                  f'lr {lr:1.5f} | s/batch {s_per_batch:5.2f} | '
                  f'loss {cur_loss:5.3f} | accuracy {cur_acc:8.3f} |')
            total_loss = 0
            batch_start_time = time.time()

In [14]:
# evaluation (val, test)
def evaluate(model, timespan, n, test_tdf, test_plist, epoch):
    model.eval()  
    train_test_flag = 1
    
    eval_loss = 0
    eval_correct = 0
    eval_acc = 0
    eval_count = 0
    
    if timespan == '4h':
        q = 3
        if epoch == -1:
            q = 1
    elif timespan == '30m':
        q = 4
        if epoch == -1:
            q = 2
    else:
        q = 8
        if epoch == -1:
            q = 3
    
    with torch.no_grad():
        for i in range(0, test_num_batches):
            if (i+epoch)%q == 0:      
                if i*batch_size-n+1 >= 0:      
                    if i != (test_num_batches-1):
                        t_start = i*batch_size-n+1
                        t_end   = i*batch_size+batch_size
                    else:
                        t_start = i*batch_size-n+1
                        t_end   = len(test_plist)
                else:
                    if i != (test_num_batches-1):
                        t_start = 0
                        t_end   = i*batch_size+batch_size
                    else:
                        t_start = 0
                        t_end   = len(test_plist)

                predictions, target = model(timespan,n, t_start, t_end, test_tdf, test_plist, train_test_flag)
                prob = softmax(predictions)
                targets = target.to(device)
                loss = criterion(predictions, targets)
                
                eval_loss += loss.item()
                correct = prob.argmax(axis=1) == targets
                eval_correct += correct.sum().item()
                eval_count += correct.size(0)
                
    eval_acc = eval_correct / eval_count
    
    print(f'| loss {eval_loss:5.3f}| accuracy {eval_acc:8.3f} |')
     
    return eval_loss, eval_acc

In [15]:
# main
nlist = [2,3,4]
tlist = ['4h','30m','5m']
#tlist = ['5m','30m','4h']
batch_size_list        = {'4h':32,  '30m':128, '5m':256}
train_num_section_list = {'4h':1090,'30m':8722,'5m':52333}
test_num_section_list  = {'4h':182, '30m':1454,'5m':8723}

for n, timespan in itertools.product(nlist,tlist):

    print('-'*50 + f'{timespan=} / {n=}' + '-'*50)
    train_num_section = train_num_section_list[timespan]
    test_num_section  = test_num_section_list[timespan]
    batch_size=batch_size_list[timespan]
    train_num_batches = (train_num_section // batch_size) +1
    test_num_batches  = (test_num_section // batch_size) +1

    lr = 1e-3
    model = Net(ntokens, d_model, nhead, d_hid, nlayers, lstm_input_dim, lstm_hidden_dim, dropout).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)
    best_val_loss = float('inf')
    epochs = 50
    best_model = None
    
    train_tdf   = read_pickle_quickly(f'../../external_drive/pickle/{timespan}/train_tdf.pickle')
    test_tdf    = read_pickle_quickly(f'../../external_drive/pickle/{timespan}/test_tdf.pickle')
    train_plist = read_pickle_quickly(f'../../external_drive/pickle/{timespan}/train_plist_{n}.pickle')
    test_plist  = read_pickle_quickly(f'../../external_drive/pickle/{timespan}/test_plist_{n}.pickle')

    dt_start = datetime.datetime.now()
    print(datetime.datetime.now())
    print('*'*45 + 'training start' + '*'*45)

    # training & test roop
    for epoch in range(1, epochs + 1):
        epoch_start = datetime.datetime.now()

        train(model, timespan, n, train_tdf, train_plist, epoch)
        val_loss, val_acc = evaluate(model, timespan, n, test_tdf, test_plist, epoch)
        
        epoch_end = datetime.datetime.now()
        elapsed = epoch_end - epoch_start

        print('-' * 87)
        print(f'| end of epoch {epoch:3d} | time: {elapsed}s | val loss：{val_loss:5.3f} | val accuracy：{val_acc:8.3f} |')
        print('-' * 87)
        
        # 結果をcsvに保存
        df_log = pd.read_csv('tweet-transformer/loss_log.csv')
        s = pd.Series([epoch_start,
                       epoch_end,
                       elapsed,
                       timespan,
                       n,
                       epoch,
                       val_loss,
                       val_acc],
                index=['start_time',
                       'end_time',
                       'elapsed',
                       'timespan',
                       'range_of_data',
                       'epoch',
                       'val_loss',
                       'val_accuracy'])
        df_log = df_log.append(s, ignore_index=True)
        df_log.to_csv('tweet-transformer/loss_log.csv',index=False)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = copy.deepcopy(model)

        scheduler.step()

    dt_end = datetime.datetime.now()
    elapsed = dt_end - dt_start
    print(datetime.datetime.now())    
    print('*'*30 + f'Finish! training time：{elapsed}s' + '*'*30)

    # test
    test_loss, test_acc = evaluate(best_model, timespan, n, test_tdf, test_plist, -1)
    print('=' * 89)
    print(f'| End of training | test loss：{test_loss:5.3f} | '
          f'test accuracy：{test_acc:8.3f}')
    print('=' * 89)
    
    # 結果をcsvに保存
    df_log = pd.read_csv('tweet-transformer/transformer_log.csv')
    s = pd.Series([dt_start,
                   dt_end,
                   elapsed,
                   timespan,
                   n,
                   test_loss,
                   test_acc],
            index=['start_time',
                   'end_time',
                   'elapsed',
                   'timespan',
                   'range_of_data',
                   'test_loss',
                   'test_accuracy'])
    df_log = df_log.append(s, ignore_index=True)
    df_log.to_csv('tweet-transformer/transformer_log.csv',index=False)

--------------------------------------------------timespan='4h' / n=2--------------------------------------------------
2021-11-15 08:14:52.797787
*********************************************training start*********************************************
| epoch   1 |    20/   35 batches | lr 0.00100 | s/batch 103.53 | loss 0.286 | accuracy    0.306 |
| epoch   1 |    35/   35 batches | lr 0.00100 | s/batch 61.80 | loss 0.454 | accuracy    0.324 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.358| accuracy    0.245 |
---------------------------------------------------------------------------------------
| end of epoch   1 | time: 1:03:51.466760s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch   2 |    20/   35 batches | lr 0.00095 | s/batch 103.45 | loss 0.285 | accuracy    0.294 |
| epoch   2 |    35/   35 batches | lr 0.00095 | s/batch 66.38 | loss 0.503 | accuracy    0.306 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.448| accuracy    0.141 |
---------------------------------------------------------------------------------------
| end of epoch   2 | time: 1:06:42.142238s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch   3 |    20/   35 batches | lr 0.00090 | s/batch 103.55 | loss 0.284 | accuracy    0.300 |
| epoch   3 |    35/   35 batches | lr 0.00090 | s/batch 83.97 | loss 0.506 | accuracy    0.319 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.343| accuracy    0.254 |
---------------------------------------------------------------------------------------
| end of epoch   3 | time: 1:12:35.898256s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch   4 |    20/   35 batches | lr 0.00086 | s/batch 103.28 | loss 0.287 | accuracy    0.233 |
| epoch   4 |    35/   35 batches | lr 0.00086 | s/batch 82.96 | loss 0.507 | accuracy    0.296 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.326| accuracy    0.245 |
---------------------------------------------------------------------------------------
| end of epoch   4 | time: 1:10:53.925440s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch   5 |    20/   35 batches | lr 0.00081 | s/batch 104.72 | loss 0.282 | accuracy    0.306 |
| epoch   5 |    35/   35 batches | lr 0.00081 | s/batch 62.38 | loss 0.449 | accuracy    0.324 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.402| accuracy    0.141 |
---------------------------------------------------------------------------------------
| end of epoch   5 | time: 1:06:15.394400s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch   6 |    20/   35 batches | lr 0.00077 | s/batch 104.32 | loss 0.282 | accuracy    0.294 |
| epoch   6 |    35/   35 batches | lr 0.00077 | s/batch 67.02 | loss 0.499 | accuracy    0.306 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.315| accuracy    0.254 |
---------------------------------------------------------------------------------------
| end of epoch   6 | time: 1:07:31.375869s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch   7 |    20/   35 batches | lr 0.00074 | s/batch 104.78 | loss 0.281 | accuracy    0.300 |
| epoch   7 |    35/   35 batches | lr 0.00074 | s/batch 85.08 | loss 0.502 | accuracy    0.319 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.302| accuracy    0.245 |
---------------------------------------------------------------------------------------
| end of epoch   7 | time: 1:12:18.768554s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch   8 |    20/   35 batches | lr 0.00070 | s/batch 104.27 | loss 0.283 | accuracy    0.233 |
| epoch   8 |    35/   35 batches | lr 0.00070 | s/batch 84.27 | loss 0.503 | accuracy    0.296 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.368| accuracy    0.141 |
---------------------------------------------------------------------------------------
| end of epoch   8 | time: 1:13:26.673895s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch   9 |    20/   35 batches | lr 0.00066 | s/batch 106.60 | loss 0.280 | accuracy    0.306 |
| epoch   9 |    35/   35 batches | lr 0.00066 | s/batch 63.28 | loss 0.446 | accuracy    0.324 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.297| accuracy    0.254 |
---------------------------------------------------------------------------------------
| end of epoch   9 | time: 1:07:04.254091s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch  10 |    20/   35 batches | lr 0.00063 | s/batch 105.85 | loss 0.280 | accuracy    0.294 |
| epoch  10 |    35/   35 batches | lr 0.00063 | s/batch 67.87 | loss 0.497 | accuracy    0.306 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.284| accuracy    0.245 |
---------------------------------------------------------------------------------------
| end of epoch  10 | time: 1:06:55.863391s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch  11 |    20/   35 batches | lr 0.00060 | s/batch 105.76 | loss 0.280 | accuracy    0.300 |
| epoch  11 |    35/   35 batches | lr 0.00060 | s/batch 85.82 | loss 0.500 | accuracy    0.319 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.342| accuracy    0.141 |
---------------------------------------------------------------------------------------
| end of epoch  11 | time: 1:14:27.698571s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch  12 |    20/   35 batches | lr 0.00057 | s/batch 104.64 | loss 0.281 | accuracy    0.233 |
| epoch  12 |    35/   35 batches | lr 0.00057 | s/batch 84.21 | loss 0.500 | accuracy    0.296 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.283| accuracy    0.254 |
---------------------------------------------------------------------------------------
| end of epoch  12 | time: 1:13:24.339207s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch  13 |    20/   35 batches | lr 0.00054 | s/batch 105.99 | loss 0.279 | accuracy    0.306 |
| epoch  13 |    35/   35 batches | lr 0.00054 | s/batch 63.01 | loss 0.445 | accuracy    0.324 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.272| accuracy    0.245 |
---------------------------------------------------------------------------------------
| end of epoch  13 | time: 1:05:19.368691s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch  14 |    20/   35 batches | lr 0.00051 | s/batch 105.69 | loss 0.279 | accuracy    0.294 |
| epoch  14 |    35/   35 batches | lr 0.00051 | s/batch 67.92 | loss 0.496 | accuracy    0.306 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.321| accuracy    0.141 |
---------------------------------------------------------------------------------------
| end of epoch  14 | time: 1:08:30.573982s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch  15 |    20/   35 batches | lr 0.00049 | s/batch 105.96 | loss 0.279 | accuracy    0.300 |
| epoch  15 |    35/   35 batches | lr 0.00049 | s/batch 85.82 | loss 0.499 | accuracy    0.319 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.272| accuracy    0.254 |
---------------------------------------------------------------------------------------
| end of epoch  15 | time: 1:14:22.249518s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch  16 |    20/   35 batches | lr 0.00046 | s/batch 105.23 | loss 0.280 | accuracy    0.233 |
| epoch  16 |    35/   35 batches | lr 0.00046 | s/batch 84.55 | loss 0.499 | accuracy    0.296 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.262| accuracy    0.245 |
---------------------------------------------------------------------------------------
| end of epoch  16 | time: 1:12:16.790003s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch  17 |    20/   35 batches | lr 0.00044 | s/batch 106.48 | loss 0.278 | accuracy    0.306 |
| epoch  17 |    35/   35 batches | lr 0.00044 | s/batch 63.41 | loss 0.444 | accuracy    0.324 |


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| loss 2.307| accuracy    0.141 |
---------------------------------------------------------------------------------------
| end of epoch  17 | time: 1:07:14.740384s | val loss：1.000 | val accuracy：   2.000 |
---------------------------------------------------------------------------------------
| epoch  18 |    20/   35 batches | lr 0.00042 | s/batch 105.86 | loss 0.278 | accuracy    0.294 |


KeyboardInterrupt: 

In [47]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device0 = torch.device("cuda:0")
device1 = torch.device("cuda:1")
print(torch.cuda.is_available())
print(device)

True
cuda


In [None]:
print(sys.getsizeof(x))
# これでtrain_tlistなどを読み込んだ時の容量を確認

In [14]:
#log リセット
'''
# test用
df = pd.DataFrame(columns=['start_time',
                           'end_time',
                           'elapsed',
                           'timespan',
                           'range_of_data',
                           'test_loss',
                           'test_accuracy'])
df.to_csv('tweet-transformer/transformer_log.csv',index=False)

# epoch毎のvalidation用
df = pd.DataFrame(columns=['start_time',
                           'end_time',
                           'elapsed',
                           'timespan',
                           'range_of_data',
                           'epoch',
                           'val_loss',
                           'val_accuracy'])
df.to_csv('tweet-transformer/loss_log.csv',index=False)
'''