In [3]:
import os
import sys
import json
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import joblib

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim.lr_scheduler as lr_scheduler
from torch.optim import Adam
from torch.utils.data import TensorDataset, DataLoader, Dataset


import warnings

warnings.filterwarnings("ignore")


# 定义函数
def normalization(data_series):
    scale = MinMaxScaler()
    data_n = np.array(data_series).reshape(-1, 1)
    data_n = scale.fit_transform(data_n)
    data_n = pd.DataFrame(data_n, index=data_series.index)
    return np.array(data_n).reshape(-1, 1), scale

def create_data(data_n, inp_len, out_len, step_len):
    inp_data, dec_inp, out_data = [], [], []
    for i in range(0, data_n.shape[0] - inp_len - out_len, step_len): # 没有缺失值
        if not np.isnan(data_n[i:i + inp_len]).any() and not np.isnan(data_n[i + inp_len:i + inp_len + out_len]).any():
            inp_data.append(data_n[i:i + inp_len, 0])
            # dec_inp.append(data_n[i:i + inp_len, 1])
            out_data.append(data_n[i + inp_len:i + inp_len + out_len, 0])
    inp_data, out_data = np.stack(inp_data), np.stack(out_data)
    return inp_data.reshape(-1, out_len, 1), out_data.reshape(-1, out_len, 1)

def acc_cal(true, pre):
    # 确保输入的形状正确
    if true.shape != pre.shape:
        raise ValueError("The shape of true and pre must be the same.")
    rmse_per_sample = np.sqrt(np.mean((true - pre) ** 2, axis=1))
    acc = (1 - np.mean(rmse_per_sample)) * 100
    return acc


def reverse_normalize(data, scaler_max, scaler_min):
    return data * (scaler_max - scaler_min) + scaler_min


In [4]:
with open("../../../../config/job_config/job_params_unscmol_17.json", "r") as f:
    job_params = json.load(f)
data_dic = joblib.load('../../../../data/user_data/{}/download_data/temp_trainset_{}_{}_{}.joblib'.format(job_params['user_id'], 
                                                                                                         job_params['user_id'], job_params['task_id'],
                                                                                                         job_params['job_number']))

'''超参数'''
train_split = 0.9
inp_len, out_len, step_len = 16, 16, 8
batchsize = 64
epochs = 100
learning_rate = 1e-3

name_list = list(data_dic.keys())
joblib.dump(name_list, 'name_ls.joblib')
for name in name_list:
    origin_data = data_dic[name]
    train_data, test_data = origin_data.iloc[:int(0.7*len(origin_data))], origin_data.iloc[int(0.7*len(origin_data)):]
    data_train_n, train_scale = normalization(train_data.iloc[:,1])
    joblib.dump(train_scale, '../../../../interactive_space/{}/upload_data/scaler_{}_{}_{}_{}.joblib'.format(job_params['user_id'], name, job_params['user_id'], job_params['task_id'], job_params['job_number']))
    data_test_n, test_scale = normalization(test_data.iloc[:,1])
    inp_data_t, out_data_t = create_data(data_train_n, inp_len, out_len, step_len)
    inp_data_v, out_data_v = create_data(data_test_n, inp_len, out_len, step_len)
    # 转化为tensor
    inp_t, out_t = torch.tensor(inp_data_t, dtype=torch.float32), torch.tensor(out_data_t, dtype=torch.float32)
    inp_v, out_v = torch.tensor(inp_data_v, dtype=torch.float32), torch.tensor(out_data_v, dtype=torch.float32)
    train_loader = DataLoader(TensorDataset(inp_t, out_t), shuffle=True, batch_size=batchsize)
    test_loader = DataLoader(TensorDataset(inp_v, out_v), shuffle=False, batch_size=batchsize)
    '''定义模型'''
    
    class User_Model(nn.Module):
        def __init__(self, input_dim, hidden_dim):
            super(User_Model, self).__init__()
            self.hidden_dim = hidden_dim
            self.gru = nn.GRU(input_dim, hidden_dim, batch_first=True)
            self.fc_out = nn.Linear(hidden_dim, 16)
        def forward(self, x):
            out, hn = self.gru(x)
            pre_power = self.fc_out(hn)
            return pre_power.permute(1, 0, 2)
        
    GPU_switch = True
    device = torch.device("cuda" if (torch.cuda.is_available() and GPU_switch) else "cpu")
    model = User_Model(1, 128).to(device)
    print('所选模型：{}'.format(model.__class__.__name__))
    print('所选设备：{}'.format(device))
        
    '''训练模型'''
    # 开始训练
    criterion = nn.MSELoss()
    optimizer = Adam(model.parameters(), lr=learning_rate)
    loss_train_step, loss_val_step = [], []
    
    for epoch in range(epochs):
        train_loss_epoch = []
        val_loss_epoch = []
        '''1、训练过程'''
        model.train()
        for batch_idx, (inp_x, yy) in enumerate(train_loader):
            inp_x, yy = inp_x.to(device), yy.to(device)
            '''本地训练1epoch，计算梯度发送给服务器'''
            pred = model(inp_x)
            loss = criterion(pred, yy)
            optimizer.zero_grad()
            loss.backward()
            train_loss_epoch.append(loss.item())  # 添加一个batch的损失
            optimizer.step()
            
        '''2、测试过程'''
        model.eval()
        with torch.no_grad():
            for batch_idx, (inp_x, yy) in enumerate(test_loader):
                inp_x, yy = inp_x.to(device), yy.to(device)
                pred = model(inp_x)
                loss = criterion(pred, yy)
                val_loss_epoch.append(loss.item())
    
        train_loss_epoch = np.mean(np.array(train_loss_epoch))  # 计算1个epoch的误差
        val_loss_epoch = np.mean(np.array(val_loss_epoch))
        loss_train_step.append(train_loss_epoch)
        loss_val_step.append(val_loss_epoch)
    
        if (epoch + 1) % 10 == 0:
            print('Epoch:{}/{}---------------------------'.format(epoch + 1, epochs))
            print('train loss:{:.4f}  ver loss:{:.4f}'.format(train_loss_epoch, val_loss_epoch))
    
    torch.save(model.state_dict(), '../../../../interactive_space/{}/upload_data/model_para_{}_{}_{}_{}.pth'.format(job_params['user_id'], name, job_params['user_id'], job_params['task_id'], job_params['job_number']))

所选模型：User_Model
所选设备：cuda
Epoch:10/100---------------------------
train loss:0.0140  ver loss:0.0143
Epoch:20/100---------------------------
train loss:0.0138  ver loss:0.0137
Epoch:30/100---------------------------
train loss:0.0136  ver loss:0.0134
Epoch:40/100---------------------------
train loss:0.0135  ver loss:0.0134
Epoch:50/100---------------------------
train loss:0.0135  ver loss:0.0134
Epoch:60/100---------------------------
train loss:0.0135  ver loss:0.0135
Epoch:70/100---------------------------
train loss:0.0136  ver loss:0.0137
Epoch:80/100---------------------------
train loss:0.0134  ver loss:0.0137
Epoch:90/100---------------------------
train loss:0.0132  ver loss:0.0136
Epoch:100/100---------------------------
train loss:0.0135  ver loss:0.0148
所选模型：User_Model
所选设备：cuda
Epoch:10/100---------------------------
train loss:0.0151  ver loss:0.0200
Epoch:20/100---------------------------
train loss:0.0146  ver loss:0.0192
Epoch:30/100---------------------------
train l

In [ ]:
'''测试模型'''
data_dic = joblib.load('../../../../data/temp/cached_data/temp_testset_xiaowang_1_0.joblib') # 依照数据说明遍写测试流程，输出归档结果

name = 'cl'
true_data, pre_data = [], []
model.eval()
with torch.no_grad():
    for batch_idx, (inp_x, yy) in enumerate(test_loader):
        inp_x, yy = inp_x.to(device), yy.to(device)
        pred = model(inp_x)
        if yy.shape[0] != 1:
            true_data.append(yy.cpu().numpy().squeeze())
            pre_data.append(pred.cpu().numpy().squeeze())
        else:
            true_data.append(yy.cpu().numpy().reshape(1, -1))
            pre_data.append(pred.cpu().numpy().reshape(1, -1))

true = np.concatenate(true_data, axis=0)
pre = np.concatenate(pre_data, axis=0)

scaler_max, scaler_min = scaler_t.data_max_[0], scaler_t.data_min_[0]


true_original = reverse_normalize(true, scaler_max, scaler_min)
pre_original = reverse_normalize(pre, scaler_max, scaler_min)

pickle.dump([true_original, pre_original], open('../../result/sample_result/{}_single.pkl'.format(name), 'wb'))
pickle.dump([true, pre], open('../../result/sample_result/{}_single_n.pkl'.format(name), 'wb'))
acc = acc_cal(true, pre)
print('{}的预测精度为{}'.format(name, acc))

In [7]:
hyper_params = {}
for name in name_list:
    hyper_params[name] = {'input_dim': 1, 'hidden_dim': 128}
joblib.dump(hyper_params, '../../../../interactive_space/{}/upload_data/model_class_hyperparams_{}_{}_{}.joblib'.format(job_params['user_id'], job_params['user_id'], job_params['task_id'], job_params['job_number']))

['../../../../interactive_space/xiaowang/upload_data/model_class_hyperparams_xiaowang_1_0.joblib']

In [67]:
scaler_max

GRU_Encoder(
  (gru): GRU(1, 128, batch_first=True)
  (fc_out): Linear(in_features=128, out_features=1, bias=True)
)