In [None]:
import numpy as np
import pandas as pd
import datetime
from sklearn.preprocessing import MinMaxScaler
from keras.models import load_model
from Plot_Self_Correlation import plot_auto_corr
from DP_LSTM import *

from matplotlib import pyplot as plt

In [None]:
Station605_pd = pd.read_csv(r'D:\WORK__wells\PROGRAM_3\Data\station605_pd_pure.csv')
Station605_pd.rename(columns={'F10':'TrafficFlow','F1':'DateTime'},inplace=True)
Station605_pd.drop(['F2','F5','F6'],axis=1,inplace=True)

In [None]:
Station605_pd.head()

In [None]:
'''数据处理模块'''
#对实验数据进行归一化处理
def Raw_Data_Normalized(Raw_df):
    Raw_pd = Raw_df.copy()
    Raw_pd_empty = Raw_pd.loc[Raw_pd.TrafficFlow.isna()]
    Raw_pd_Nonempty = Raw_pd.drop(Raw_pd_empty.index,axis=0)
    MMScaler = MinMaxScaler(feature_range=(1,2))
    MMScaled_Data = MMScaler.fit_transform(np.reshape(Raw_pd_Nonempty['TrafficFlow'].get_values(),(Raw_pd_Nonempty.TrafficFlow.shape[0],1)))
    Raw_pd_Nonempty['TrafficFlow'] = MMScaled_Data
    Raw_pd = pd.concat([Raw_pd_Nonempty, Raw_pd_empty])
    return Raw_pd,MMScaler

def GetTimeSeriesData(Normalized_5pd, TimeLag):
    # 输入 标准化后的完整数据、时滞TimeLag
    # 输出 时间序列数据 np.array
    sequence_length = TimeLag + 1
    result = []
    for index in range(len(Normalized_5pd) - sequence_length):
        result.append(Normalized_5pd.TrafficFlow[index: index + sequence_length])
    result = np.array(result)
    return result

def RandomEntire(df_t, EmptyProportion):
    #整体缺失一部分 返回pd (DateTime, Traffic_True, TrafficFlow)
    df = df_t.copy()
    DropNum =  round(df.shape[0]*EmptyProportion)
    df_len = df.shape[0]
    Save_pd = df.drop([df_len-1-i for i in range(DropNum)],axis=0)
    df.rename(columns={'TrafficFlow':'Traffic_True'},inplace=True)
    #df_cp = df.drop(['TraficFlow'],axis=1)
    Save_pd = pd.merge(df, Save_pd,on=['DateTime'],how='outer')
    Save_pd.sort_values(by=['DateTime'],inplace=True)
    Save_pd.index = [i for i in range(len(Save_pd))]
    return Save_pd

#输入构造不同粒度的数据记录,分别控制不同的缺失比例
def Get_HigherScaleRecord(Normalized_605pd,Time_scale):
    # 输入标准化后的完整数据  构造Time_scale粒度的数据记录
    # 输出新粒度数据pd
    new_grid_pd = pd.DataFrame(columns=['DateTime','TrafficFlow','Traffic_True'])
    line_grid_pd = pd.DataFrame(columns=['DateTime','TrafficFlow','Traffic_True'])
    for i in range(Normalized_605pd.shape[0]):
        if (i+1)% Time_scale== 0:
            selected_pd =  Normalized_605pd.loc[[i-j+1 for j in range(Time_scale, 0,-1)]]    
            line_grid_pd['TrafficFlow'] = [selected_pd.TrafficFlow.sum()]
            line_grid_pd['Traffic_True'] = [selected_pd.Traffic_True.sum()]
                #print(df.loc[[i,i+1,i+2]].DateTime.get_values()[0])
            line_grid_pd['DateTime'] = [Normalized_605pd.loc[[i-Time_scale+1]].DateTime.get_values()[0]]
                #print(line_grid_pd)
            new_grid_pd = pd.concat([new_grid_pd, line_grid_pd])
    new_grid_pd.index = [i for i in range(len(new_grid_pd))]
    new_grid_pd = new_grid_pd.replace(0,np.nan)
#     df_list[str(Time_scale)] =  new_grid_pd
    return new_grid_pd

In [None]:
def PrePareLSTM(Station5_pd, TimeLag, params):
    print(r'预训练模型')
    Normalized_5pd,MM5Scaler = Raw_Data_Normalized(Station5_pd)
#     print(Normalized_5pd.head())
    TimeSeries5Data = GetTimeSeriesData(Normalized_5pd, TimeLag)
    PreTrain_X = TimeSeries5Data[:,:-1]
    PreTrain_Y = TimeSeries5Data[:,-1]
    
    PreTrain_X = np.reshape(PreTrain_X, (PreTrain_X.shape[0], 1,PreTrain_X.shape[1]))
    PreTrain_Y = np.reshape(PreTrain_Y, (PreTrain_Y.shape[0], 1,1))
    
    TransferModel = build_LSTM_Model(inputDim = PreTrain_X.shape[2],lr=params['lr'],
                                     nb_hidden_cell=params['nb_hidden_cell'],
                                     nb_layers=params['nb_layers'])
    PrepairedModel = model_fit(TransferModel,PreTrain_X,PreTrain_Y,
                               validation_split=0.2,
                               saveFile='D:\WORK__wells\PROGRAM_3\Model pic',
                               epochs=params['epochs'],batch_size=params['batch_size'])
    print(r'预训练模型学习完毕！') 
    return PrepairedModel

'''
迁移学习微调实验模型

'''

def FineTune(Prepared_model, Train_X, Train_Y,params):
    from keras.layers import Dense
    model_new = Prepared_model
    model_new.pop()
    for i in range(len(Prepared_model.layers)-1):  # 除最后一层外，权重全部导入
        model_new.layers[i].set_weights(Prepared_model.layers[i].get_weights())
        if i < params['frozenlayer']:
            model_new.layers[i].trainable=False
    
        # model_new.add(Dense(5, activation='sigmoid', name='last2'))
    model_new.add(Dense(1,activation='linear'))
#     saveFile = 'D:/CPQ/air/hour_tranfer_hour'
    #调整迁移预测数据准备
    # 标准化进行预测
        
    ReTrain_X = np.reshape(Train_X, (Train_X.shape[0], 1,Train_X.shape[1]))
    ReTrain_Y = np.reshape(Train_Y, (Train_Y.shape[0], 1,1))
    print('微调迁移学习模型')
    Prepared_model = model_fit(model_new, ReTrain_X, 
                                   ReTrain_Y,epochs=params['TransferEpochs'], 
                                   batch_size=params['TransferBatch_size'])
    return Prepared_model




In [None]:
# 在10分钟 30分钟 一个小时 一天的范围内 比较直接预测模型与基于迁移学习的模型（基模型为5分钟粒度）的性能
# 构造

'''
输入：
1、 Station605_pd
2、时滞 TimeLag
3、要构造的粒度 列表 GridList
3、params

输出：不同粒度下直接LSTM与迁移学习LSTM性能的比较
'''
def TransferLearning(Station605_pd,TimeLag,GridList,params,FrozenLayers=3):
    # 准备迁移学习基本模型
    PrepairedModel = PrePareLSTM(Station605_pd, TimeLag, params)
    result_pd = pd.DataFrame(columns=['Grid','Direct_RMSE','Direct_MAE','Direct_MAPE','RMSE','MAE','MAPE'])
    line_pd = pd.DataFrame(columns=['Grid','Direct_RMSE','Direct_MAE','Direct_MAPE','RMSE','MAE','MAPE'])
    #
    Empty_pd = RandomEntire(Station605_pd,0)
    # 构造不同粒度的数据
    for EachGrid in GridList:
        print(r'准备数据')
        HighGrid_pd = Get_HigherScaleRecord(Empty_pd, EachGrid)
        Normalized_605pd,MM5Scaler = Raw_Data_Normalized(HighGrid_pd)
        TimeSeries605Data = GetTimeSeriesData(Normalized_605pd, TimeLag)
        Tmp_pd = pd.DataFrame(TimeSeries605Data)
        # 切分训练验证数据
        Train_pd = Tmp_pd.sample(frac = 0.8,replace=False)
        Test_pd = Tmp_pd.drop(Train_pd.index,axis=0)
        # 准备训练数据 验证数据
        Train_X = Train_pd.get_values()[:,:-1]
        Train_Y = Train_pd.get_values()[:,-1]
        
        Validation_X = Test_pd.get_values()[:,:-1]
        Validation_Y = Test_pd.get_values()[:,-1]
        # 微调模型
        TranseferedModel = FineTune(PrepairedModel, Train_X, Train_Y,params)
        
        #直接预测
        Train_X = np.reshape(Train_X, (Train_X.shape[0], 1,Train_X.shape[1]))
        Train_Y = np.reshape(Train_Y, (Train_Y.shape[0], 1,1))
        
        Validation_X = np.reshape(Validation_X, (Validation_X.shape[0], 1,Validation_X.shape[1]))
        Validation_Y = np.reshape(Validation_Y, (Validation_Y.shape[0], 1,1))
        print(r'直接预测')
        Model = build_LSTM_Model(inputDim = Train_X.shape[2],lr=params['lr'],nb_hidden_cell=params['nb_hidden_cell'],nb_layers= params['nb_layers'])
        fit_model = model_fit(Model,Train_X, Train_Y, validation_split=0.1, epochs=params['epochs'], batch_size=params['batch_size'])
        
        RMSE,MAE,MAPE,Predict_y,True_Y = model_predict(fit_model,Validation_X,Validation_Y,MM5Scaler)
        line_pd['Direct_RMSE'] = [RMSE]
        line_pd['Direct_MAE'] = [MAE]
        line_pd['Direct_MAPE'] = [MAPE]
        line_pd['Grid'] = [EachGrid]
        print(r'迁移学习预测')
        RMSE,MAE,MAPE,Predict_y,True_Y = model_predict(TranseferedModel,Validation_X,Validation_Y,MM5Scaler)
        line_pd['RMSE'] = [RMSE]
        line_pd['MAE'] = [MAE]
        line_pd['MAPE'] = [MAPE]
        
        result_pd = pd.concat([result_pd, line_pd])
        result_pd.to_csv(r'D:\WORK__wells\PROGRAM_3\Result\result\ResultGrid.csv',index=None)
        # 对于整体数据 直接预测和迁移学习预测分别出下图
        print(r'整体图')
        Entire_X = TimeSeries605Data[:,:-1]
        Entire_Y = TimeSeries605Data[:,-1]
        Entire_X = np.reshape(Entire_X, (Entire_X.shape[0], 1,Entire_X.shape[1]))
        Entire_Y = np.reshape(Entire_Y, (Entire_Y.shape[0], 1,1))
        # 直接预测，迁移学习预测
        RMSE,MAE,MAPE,Predict_y,True_Y = model_predict(fit_model,Entire_X,Entire_Y,MM5Scaler)
        RMSE,MAE,MAPE,TransferPredict_y,True_Y = model_predict(TranseferedModel,Entire_X,Entire_Y,MM5Scaler)
        resultDic = {
                     'True_Y':True_Y,
                     'DirectPredict':Predict_y,
                     'TrnsferPredict': TransferPredict_y
                     
                    }
        resultDic['True_Y'] = resultDic['True_Y'].reshape(resultDic['True_Y'].shape[0])
        resultDic['DirectPredict'] = resultDic['DirectPredict'].reshape(resultDic['DirectPredict'].shape[0])
        resultDic['TrnsferPredict'] = resultDic['TrnsferPredict'].reshape(resultDic['TrnsferPredict'].shape[0])
        
        ForPlot_pd = pd.DataFrame(resultDic)
        ForPlot_pd.to_csv(r'D:\WORK__wells\PROGRAM_3\Result\result\ForPlot'+str(EachGrid)+'.csv',index=None)
        Normalized_605pd.to_csv(r'D:\WORK__wells\PROGRAM_3\Result\result\日期数据'+str(EachGrid)+'.csv',index=None)
    return result_pd,Normalized_605pd

In [None]:
params = {'lr': 0.001,
          'nb_hidden_cell': [80,80],
          'nb_layers': 2,
          'epochs': 10,
          'batch_size': 30,
          'TransferEpochs':20,
          'TransferBatch_size':2,
          'frozenlayer':3}

TimeLag = 5
# GridList = [2, 6, 12, 36,288]
GridList = [2]
result_pd,Normalized_605pd = TransferLearning(Station605_pd,TimeLag,GridList,params)

In [None]:
# 不同时滞下LSTM的预测效果
def TimeLagPlot(Station605_pd,TimeLags,params):

    result_pd = pd.DataFrame(columns=['TimeLags','Direct_RMSE','Direct_MAE','Direct_MAPE'])
    line_pd = pd.DataFrame(columns=['TimeLags','Direct_RMSE','Direct_MAE','Direct_MAPE'])
    #
    Empty_pd = RandomEntire(Station605_pd,0)
    Normalized_605pd,MM5Scaler = Raw_Data_Normalized(Empty_pd)
    for EachLag in TimeLags:
        print(r'准备数据')
        TimeSeries605Data = GetTimeSeriesData(Normalized_605pd, EachLag)
        Tmp_pd = pd.DataFrame(TimeSeries605Data)
        # 切分训练验证数据
        Train_pd = Tmp_pd.sample(frac = 0.8,replace=False)
        Test_pd = Tmp_pd.drop(Train_pd.index,axis=0)
        # 准备训练数据 验证数据
        Train_X = Train_pd.get_values()[:,:-1]
        Train_Y = Train_pd.get_values()[:,-1]
        
        Validation_X = Test_pd.get_values()[:,:-1]
        Validation_Y = Test_pd.get_values()[:,-1]
        
        #直接预测
        Train_X = np.reshape(Train_X, (Train_X.shape[0], 1,Train_X.shape[1]))
        Train_Y = np.reshape(Train_Y, (Train_Y.shape[0], 1,1))
        
        Validation_X = np.reshape(Validation_X, (Validation_X.shape[0], 1,Validation_X.shape[1]))
        Validation_Y = np.reshape(Validation_Y, (Validation_Y.shape[0], 1,1))
        print(r'直接预测')
        Model = build_LSTM_Model(inputDim = Train_X.shape[2],lr=params['lr'],nb_hidden_cell=params['nb_hidden_cell'],nb_layers= params['nb_layers'])
        fit_model = model_fit(Model,Train_X, Train_Y, validation_split=0.1, epochs=params['epochs'], batch_size=params['batch_size'])
        
        RMSE,MAE,MAPE,Predict_y,True_Y = model_predict(fit_model,Validation_X,Validation_Y,MM5Scaler)
        line_pd['Direct_RMSE'] = [RMSE]
        line_pd['Direct_MAE'] = [MAE]
        line_pd['Direct_MAPE'] = [MAPE]
        line_pd['TimeLags'] = [EachLag]
        
        result_pd = pd.concat([result_pd, line_pd])
        result_pd.to_csv(r'D:\WORK__wells\PROGRAM_3\Result\result\不同时滞.csv',index=None)
        
    return result_pd

In [None]:
params = {'lr': 0.001,
          'nb_hidden_cell': [80,80,80,80,80,80],
          'nb_layers': 6,
          'epochs': 20,
          'batch_size': 100,
          'TransferEpochs':20,
          'TransferBatch_size':30,
          'frozenlayer':3}


# GridList = [2, 6, 12, 36,288]
TimeLags = [i+25 for i in range(34)]
result_pd = TimeLagPlot(Station605_pd,TimeLags,params)