In [1]:
import math
import torch
import torch.nn.functional as F
from torch import nn, Tensor
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.utils.data import Dataset, DataLoader
from torchtext import data
from torchtext.legacy import data
from torchtext.data.utils import get_tokenizer
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import pandas as pd
import itertools
import datetime
import shutil
import pickle
import random
import time
import copy
import sys
import gc
import os

In [2]:
# pickle書き込み
# ログなしVer
def write_pickle_quickly(filepath, data):
    with open(filepath, 'wb') as p:
        pickle.dump(data,p)
        
# pickle読み出し
# ログなしVer
def read_pickle_quickly(filepath):
    with open(filepath, 'rb') as p:
        data = pickle.load(p)
    return data

In [3]:
d_model = 512   # embedding dimension

In [10]:
# Transformer-LSTMモデルの概要
class Net(nn.Module):

    def __init__(self,
                 d_model: int):

        super().__init__()
        self.dense1 = nn.Linear(128,d_model)
        self.dense2 = nn.Linear(d_model,d_model*2)
        self.dense3 = nn.Linear(d_model*2,d_model)
        self.dense4 = nn.Linear(d_model,3)
        
        self.dense5 = nn.Linear(3,16)
        self.dense6 = nn.Linear(16,3)

        self.init_weights()

    def init_weights(self):
        initrange = 0.1
        self.dense1.bias.data.zero_()
        self.dense1.weight.data.uniform_(-initrange, initrange)
        self.dense2.bias.data.zero_()
        self.dense2.weight.data.uniform_(-initrange, initrange)
        self.dense3.bias.data.zero_()
        self.dense3.weight.data.uniform_(-initrange, initrange)
        self.dense4.bias.data.zero_()
        self.dense4.weight.data.uniform_(-initrange, initrange)
        self.dense5.bias.data.zero_()
        self.dense5.weight.data.uniform_(-initrange, initrange)
        self.dense6.bias.data.zero_()
        self.dense6.weight.data.uniform_(-initrange, initrange)

    #データの流れ
    def forward(self, ids):# [batch_size, 3]
        for i in range(0,5):
            x = self.dense1(ids) # [batch_size, d_model]
            x = self.dense2(x) # [batch_size, d_model1*2]
            x = self.dense3(x) # [batch_size, d_model]
            x = self.dense4(x) # [batch_size, 3]
            print(f'1.{x=}')
            write_pickle_quickly(f'pickle/test_{i}.pickle', x)
        
        l = []
        for i in range(0,5):
            x = read_pickle_quickly(f'pickle/test_{i}.pickle')
            l.append(x.unsqueeze(0))
        
        x = torch.cat(l, dim=0).to(torch.float).to(device)         
        x = x.mean(dim=0)
        x = self.dense5(x) # [batch_size, d_model]
        x = self.dense6(x) # [batch_size, d_model1*2]
        print(f'2.{x=}')
        return x

In [11]:
# paramator for training & evaluation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
lr = 1e-3
model = Net(d_model).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)
torch.manual_seed(0)

<torch._C.Generator at 0x7f4b680eefd0>

In [12]:
# training
def train(model):
    model.train()
    x = torch.randn(128)
    
    for i in range(0, 5):
        predictions = model(x.to(device)).unsqueeze(0)
        targets = torch.tensor([i%3]).to(device)
        loss = criterion(predictions, targets)

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()

In [None]:
# 初期値
for param in model.parameters():
    print(param)

In [None]:
# main
epochs = 5
dt_start = datetime.datetime.now()
print(datetime.datetime.now())

print('*'*45 + 'training start' + '*'*45)

# training & test roop
for epoch in range(1, epochs + 1):
    print('-'*100)
    epoch_start = time.time()
    train(model)
    for param in model.parameters():
        print(param)

In [None]:
'''        
        l=[]
        for i in range(0,5):
            x = self.dense1(ids) # [batch_size, d_model]
            x = self.dense2(x) # [batch_size, d_model1*2]
            x = self.dense3(x) # [batch_size, d_model]
            x = self.dense4(x) # [batch_size, 3]
            l.append(x.unsqueeze(0).to('cpu'))    
        x = torch.cat(l, dim=0).to(torch.float).to(device)         
        x = x.mean(dim=0)
        x = self.dense5(x) # [batch_size, d_model]
        x = self.dense6(x) # [batch_size, d_model1*2]
'''