In [19]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
import scipy.io as scio
import math
import time
from models.XSformer  import XSformer
from util.data_loader import My_dataset
from util.epoch_timer import epoch_time
"""
参数表
"""
filename = 'dataset_12000_re.mat'
dataNum = 12000
# model para
nhead           = 2
d_model         = 6
dim_feedforward = 64
dropout         = 0.1
n_layers        = 3
mlp_hidden      = 16
LR              = 0.1
batchsize       = 3200
# adam para
init_lr         = 1e-5
weight_decay    = 5e-4
adam_eps        = 5e-9
# scheduler para
factor          = 0.9
patience        = 10
# others
warmup          = 100
epoch           = 10
clip            = 1.0
inf             = float('inf')

In [20]:
"""
设置模型，优化器，损失函数
"""
model    = XSformer(nhead           = nhead,
                    d_model         = d_model,
                    dim_feedforward = dim_feedforward,
                    dropout         = dropout,
                    n_layers        = n_layers,
                    mlp_hidden      = mlp_hidden,
                    LR              = LR)
optimizer = Adam(params       = model.parameters(),
                 lr           = init_lr,
                 weight_decay = weight_decay,
                 eps          = adam_eps)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                                 verbose=True,
                                                 factor=factor,
                                                 patience=patience)
criterion = nn.L1Loss()
# 输出模型大小并初始化        
print(f'The model has {count_parameters(model):,} trainable parameters')
model.apply(initialize_weights)  
#out = model(x,y)

The model has 6,873 trainable parameters


  nn.init.kaiming_uniform(m.weight.data)


XSformer(
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=6, out_features=6, bias=True)
          )
          (linear1): Linear(in_features=6, out_features=64, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=64, out_features=6, bias=True)
          (norm1): LayerNorm((6,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((6,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
        (1): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=6, out_features=6, bias=True)
          )
          (linear1): Linear(in_features=6, out_features=64, bias=T

In [21]:
"""
subfunction
"""
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.kaiming_uniform(m.weight.data)
        
def train(model, iterator, optimizer, criterion, clip):
    model.train()
    epoch_loss = 0
    for i, (src, trg, out) in enumerate(iterator):
        #src = batch.src
        #trg = batch.trg
        # reshape data
        src = src.view(-1,4,d_model)  #  batch*24       ---->   batch* len(4)* d_model(6)
        trg = trg.unsqueeze(1)  #  batch*d_model        ---->   batch* len(1)* d_model(6)
        x = torch.tensor(src, dtype=torch.float)
        y = torch.tensor(trg, dtype=torch.float)
        z = torch.tensor(out, dtype=torch.float)   
        # 
        optimizer.zero_grad()

        #output = model(src, trg)
        output = model(x, y)
        #output_reshape = output.contiguous().view(-1, output.shape[-1])
        #trg = trg[:, 1:].contiguous().view(-1)
        out = out.contiguous().view(-1,out.size(-1))  # batch*1*1 ----> batch*1
        z = z.contiguous().view(-1,1)               # batch ----> batch*1
        
        loss = criterion(output, z)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        epoch_loss += loss.item()
        print('step :', round((i / len(iterator)) * 100, 2), '% , loss :', loss.item())
    return epoch_loss / len(iterator)

def run(total_epoch, best_loss):
    train_losses, test_losses, bleus = [], [], []
    for step in range(total_epoch):
        start_time = time.time()
        train_loss = train(model, train_batch, optimizer, criterion, clip)
        end_time = time.time()
        #if step > warmup:
        #   scheduler.step(valid_loss)

        train_losses.append(train_loss)
        #test_losses.append(valid_loss)
        #bleus.append(bleu)
        epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        f = open('result/train_loss.txt', 'w')
        f.write(str(train_losses))
        f.close()        
        print(f'Epoch: {step + 1} | Time: {epoch_mins}m {epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
        
        

In [22]:
loader = My_dataset(filename,d_model,dataNum)
trainSet, validSet, testSet = loader.make_dataset(dataNum)
train_batch, valid_batch, test_batch = loader.make_iter(trainSet, validSet, testSet,
                                                     batchsize=batchsize) 

dataset initializing done


In [23]:
run(total_epoch=epoch, best_loss=inf)

  x = torch.tensor(src, dtype=torch.float)
  y = torch.tensor(trg, dtype=torch.float)
  z = torch.tensor(out, dtype=torch.float)
  return F.l1_loss(input, target, reduction=self.reduction)


step : 0.0 % , loss : 12.950315475463867
step : 33.33 % , loss : 12.859456062316895
step : 66.67 % , loss : 12.883637428283691
Epoch: 1 | Time: 0m 0s
	Train Loss: 12.898 | Train PPL: 399433.666
step : 0.0 % , loss : 12.944225311279297
step : 33.33 % , loss : 12.849969863891602
step : 66.67 % , loss : 12.851941108703613
Epoch: 2 | Time: 0m 0s
	Train Loss: 12.882 | Train PPL: 393188.896
step : 0.0 % , loss : 12.926239013671875
step : 33.33 % , loss : 12.843324661254883
step : 66.67 % , loss : 12.878366470336914
Epoch: 3 | Time: 0m 0s
	Train Loss: 12.883 | Train PPL: 393424.076
step : 0.0 % , loss : 12.909133911132812
step : 33.33 % , loss : 12.827638626098633
step : 66.67 % , loss : 12.842483520507812
Epoch: 4 | Time: 0m 0s
	Train Loss: 12.860 | Train PPL: 384520.360
step : 0.0 % , loss : 12.90361213684082
step : 33.33 % , loss : 12.82951831817627
step : 66.67 % , loss : 12.812938690185547
Epoch: 5 | Time: 0m 0s
	Train Loss: 12.849 | Train PPL: 380290.120
step : 0.0 % , loss : 12.8970956