In [1]:
from utils import *

## 1.hyperparameter

In [2]:
# hyper
features = ['SOH','voltage_measured', 'current_measured',
            'temperature_measured', 'time']
batch_size = 1  # 1*len(every_file)
input_size = len(features)
hidden_size = 128
num_layers = 1
output_size = 1
seq_len = 20   # 预测序列长度
epoch = 500   # 1*len(train_directory)
learning_rate = 0.001  # upgrade to adaptive lr?

save_path = 'seq{}_.pth'.format(str(seq_len))  # model path
train_directory = '../datasets/train/'
val_directory = '../datasets/val/'
test_directory = '../datasets/alldataset/'

In [3]:
# train

# checking if GPU is available
device = torch.device("cpu")
if (torch.cuda.is_available()):
    device = torch.device("cuda:0")
    print('Training on GPU.')
else:
    print('No GPU available, training on CPU.')

# 构建模型
model = LSTMModel(input_size=input_size,
                  hidden_size=hidden_size,
                  output_size=output_size,
                  num_layers = num_layers)
model = model.to(device) # lstm doesnt work on gpu?
# train
train_dataset = LoadDataset(train_directory, seq_len=seq_len, features=features)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)

# validation
val_dataset = LoadDataset(val_directory, seq_len=seq_len, features=features)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

# 定义损失函数和优化器
criterion = nn.MSELoss()
# criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 储存路径
work_dir = './LSTM'
# 添加tensorboard
# writer = SummaryWriter("{}/logs".format(work_dir))

# model = model.to(device)
criterion = criterion.to(device)

# checkponits = epoch // 5

# 训练模型
for epoch in range(epoch):
    print("-------epoch  {} -------".format(epoch))
    # 训练步骤
    model.train()
    total_train_loss = 0
    for train_data, train_data_real in train_dataloader:
        train_data = torch.squeeze(train_data).to(device)
        train_data_real = torch.squeeze(train_data_real).to(device)

        output, _ = model(train_data)
        output = torch.squeeze(output)
        optimizer.zero_grad()
        train_loss = criterion(output, train_data_real)
        train_loss.backward()
        optimizer.step()

        total_train_loss += train_loss.item()
    avg_train_loss = total_train_loss/len(train_dataloader)
    print("train set Loss: {}".format(avg_train_loss)) # 出现nan可能是seq_len太长了,有些数据集比seq_len短

    # 测试步骤
    model.eval()
    total_val_loss = 0
    with torch.no_grad():#用于在推断或验证阶段，当不需要计算梯度时，以提高效率和减少内存占用
        for val_data, val_data_real in val_dataloader:
            val_data = torch.squeeze(val_data).to(device)
            val_data_real = torch.squeeze(val_data_real).to(device)

            val_output, _ = model(val_data)
            val_output = torch.squeeze(val_output)
            val_loss = criterion(val_output, val_data_real)
            total_val_loss += val_loss.item()

    avg_val_loss = total_val_loss/len(val_dataloader)
    print("val set Loss: {}".format(avg_val_loss))

    # save checkpoint
    # if epochs % checkponits == 0:
    
    if avg_train_loss < 0.0025  and avg_val_loss< 0.0025 :
        torch.save(model.state_dict(), str('EX_')+save_path[:-4]+'.pth')
        print("save model:epoch {}".format(epoch))
    elif 0.0025 < avg_train_loss < 0.0036 and 0.0025 < avg_val_loss< 0.0036 :
        torch.save(model.state_dict(), str('A_')+save_path[:-4]+'.pth')
        print("save model:epoch {}".format(epoch))
    elif 0.0036 < avg_train_loss < 0.0049  and 0.0036 < avg_val_loss< 0.0049 :        
        torch.save(model.state_dict(), str('B_')+save_path[:-4]+'.pth')
        print("save model:epoch {}".format(epoch))
    last_train_loss = avg_train_loss
    last_val_loss = avg_val_loss
# save last1
torch.save(model.state_dict(), save_path[:-4]+'last.pth')

Training on GPU.
-------epoch  0 -------
train set Loss: 0.3081004764139652
val set Loss: 0.21314329405625662
-------epoch  1 -------
train set Loss: 0.24179610927589237
val set Loss: 0.05799399440487226
-------epoch  2 -------
train set Loss: 0.17695694818161428
val set Loss: 0.014206769255300363
-------epoch  3 -------
train set Loss: 0.1385323558934033
val set Loss: 0.041642249251405396
-------epoch  4 -------
train set Loss: 0.13191318767145277
val set Loss: 0.06665029749274254
-------epoch  5 -------
train set Loss: 0.11686185862869024
val set Loss: 0.05226448364555836
-------epoch  6 -------
train set Loss: 0.0709448061697185
val set Loss: 0.02043652596573035
-------epoch  7 -------
train set Loss: 0.05856394522532355
val set Loss: 0.03357184290265044
-------epoch  8 -------
train set Loss: 0.033917240211740134
val set Loss: 0.008113949676044285
-------epoch  9 -------
train set Loss: 0.027575690841767937
val set Loss: 0.005939080651539068
-------epoch  10 -------
train set Loss: