In [49]:
import pandas as pd
import numpy as np
import os
from keras.models import Sequential
from keras.layers import Dense, Activation,LSTM,Dropout
from keras.optimizers import Adam
import tensorflow as tf



    
def sliceWindow(data,target,step): 
    X,y,temp=[],[],[]
    temp.append(target)
    for i in range(0,len(np.array(data))-2*step,1):  
        end=i+step       
        oneX,oney=np.array(data)[i:end,:],np.array(data[temp])[end:end+step, :]  
        X.append(oneX)  
        y.append(oney)  
    return np.array(X),np.array(y)  



def dataSplit(dataset,target,step,ratio=0.80):  
    datasetX,datasetY=sliceWindow(dataset,target,step)  
    train_size=int(len(datasetX)*ratio)  
    X_train,y_train=datasetX[0:train_size,:],datasetY[0:train_size,:] 
    X_test,y_test=datasetX[train_size:len(datasetX),:],datasetY[train_size:len(datasetX),:]  
    X_train=X_train.reshape(X_train.shape[0],step,-1)  
    X_test=X_test.reshape(X_test.shape[0],step,-1)  
    y_train = y_train.reshape(y_train.shape[0],-1,step)
    y_test = y_test.reshape(y_test.shape[0],-1,step)
    print('X_train.shape: ',X_train.shape)  
    print('X_test.shape: ',X_test.shape)  
    print('y_train.shape: ',y_train.shape)  
    print('y_test.shape: ',y_test.shape)  
    return X_train,X_test,y_train,y_test  


print('============获取数据=================')
path = os.getcwd()
test = pd.read_csv(path+'\Quant_Public_Data_March\预测目标.测试集.csv')
train = pd.read_csv(path+'\Quant_Public_Data_March\预测目标.训练集.csv')
mkt_data = pd.read_csv(path+'\Quant_Public_Data_March\市场数据.训练集.日.csv')
fin_monthly_data = pd.read_csv(path+'\Quant_Public_Data_March\经济数据.训练集.月.csv')
fin_seasonly_data = pd.read_csv(path+'\Quant_Public_Data_March\经济数据.训练集.季.csv')
fin_yearly_data = pd.read_csv(path+'\Quant_Public_Data_March\经济数据.训练数据.年.csv')
fin_est_data = pd.read_csv(path+'\Quant_Public_Data_March\房地产数据.训练集.月.csv')


# 将非日数据转化为日数据
fin_est_data[['biz_date']] = fin_est_data['biz_month']+'-01'
fin_seasonly_data[['biz_date']] = fin_seasonly_data['biz_season']+'-01'
fin_monthly_data[['biz_date']] = fin_monthly_data['biz_month']+'-01'

del fin_est_data['biz_month']
del fin_seasonly_data['biz_season']
del fin_monthly_data['biz_month']



# 所有字段拼接起来
origin_train_data = pd.merge(pd.merge(pd.merge(pd.merge(train,mkt_data,on = ['biz_date'],how='outer'),
                                       fin_est_data,on = ['biz_date'],how='outer'),
                              fin_seasonly_data,on = ['biz_date'],how='outer'),
                     fin_monthly_data,on = ['biz_date'],how='outer')

del origin_train_data['Unnamed: 5']

final_pre = []
target_label = [c for c in origin_train_data.columns if '_10y' in c]

for target in target_label:
    train_data = origin_train_data
    print('========='+target +'===========')

    train_data = train_data[train_data[target].notna()]
    train_data = train_data.fillna(0)
    train_data = train_data.sort_values(by='biz_date') 
    train_data= train_data.set_index('biz_date')
    
    print('============保存'+target+'数据=================')
    
    
    train_data.to_csv(target+'.csv',index=True,header=True) 

    print('============数据分割=================')

    X_train,X_test,y_train,y_test  = dataSplit(train_data,target,step=30)

    # 创建模型

    print('============创建模型=================')
    model = Sequential()

    model.add(LSTM(64, input_shape=(X_train.shape[1],X_train.shape[2]),activation='sigmoid', return_sequences=True, dropout=0.01))
    model.add(LSTM(128, activation='sigmoid', return_sequences=True, dropout=0.01))
    model.add(Dropout(rate=0.01))
    model.add(LSTM(128, activation='sigmoid', dropout=0.01))
    model.add(Dense(30))
    model.compile(loss='mae', optimizer=Adam(lr=0.002, decay=0.01))

    print(model.summary())

    # 训练模型
    print('============训练模型=================')

    tf.config.experimental_run_functions_eagerly(True)
    history = model.fit(X_train, y_train, epochs=50, batch_size=100, validation_data=(X_test, y_test), shuffle=False)
    model.save(target + ".h5")

    print('=============预测数据================')
    pre_train = np.array(train_data.tail(30))
    pre_train = pre_train.reshape(1,30,44)  
    y_pre=model.predict(pre_train,verbose=0)
    print(y_pre)
    final_pre.append(y_pre)

pd.DataFrame(final_pre).to_csv('final_pre.csv',index=False,header=True) 

X_train.shape:  (3814, 30, 44)
X_test.shape:  (954, 30, 44)
y_train.shape:  (3814, 1, 30)
y_test.shape:  (954, 1, 30)
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_12 (LSTM)               (None, 30, 64)            27904     
_________________________________________________________________
lstm_13 (LSTM)               (None, 30, 128)           98816     
_________________________________________________________________
dropout_4 (Dropout)          (None, 30, 128)           0         
_________________________________________________________________
lstm_14 (LSTM)               (None, 128)               131584    
_________________________________________________________________
dense_4 (Dense)              (None, 30)                3870      
Total params: 262,174
Trainable params: 262,174
Non-trainable params: 0
______________________________________________________________



Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
[[3.513096  3.5142941 3.514343  3.5143735 3.5151334 3.515776  3.5132782
  3.5153294 3.5158706 3.5163882 3.5169113 3.5152533 3.515257  3.5175238
  3.5189269 3.5148804 3.5132825 3.5139997 3.513206  3.5135252 3.5128067
  3.5112307 3.511518  3.5128229 3.5129488 3.5129685 3.5124998 3.5128512
  3.5128493 3.5141168]]
X_train.shape:  (3811, 30, 44)
X_test.shape:  (953, 30, 44)
y_train.shape:  (3811, 1, 30)
y_test.shape:  (95

Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
[[4.031071  4.0315194 4.0321174 4.0326886 4.032132  4.032765  4.0339274
  4.034975  4.0357757 4.0363503 4.0363607 4.0373836 4.0371566 4.0377464
  4.0383544 4.0383263 4.040042  4.040608  4.0411234 4.0411596 4.040986
  4.04148   4.0418873 4.0425196 4.042497  4.042802  4.0432677 4.0432787
  4.0433655 4.0429463]]
X_train.shape:  (2094, 30, 44)
X_test.shape:  (524, 30, 44)
y_train.shape:  (2094, 1, 30)
y_test.shape:  (524, 1, 30)
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param # 

Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
[[3.9088159 3.898198  3.8853476 3.9024794 3.8846574 3.9104345 3.9015927
  3.9088993 3.8941846 3.9061675 3.8932173 3.9125257 3.8899252 3.892897
  3.9299946 3.899489  3.8867602 3.9172938 3.9257028 3.9153435 3.9099078
  3.916144  3.9122956 3.898926  3.9177995 3.8976152 3.9020042 3.8943365
  3.8913846 3.9272826]]
X_train.shape:  (2490, 30, 44)
X_test.shape:  (623, 30, 44)
y_train.shape:  (2490, 1, 30)
y_test.shape:  (623, 1, 30)
Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_21 (LSTM)               (None, 30, 64)            27904     
_________________________________________________________________
lstm_22 (LSTM)               (Non

Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
[[5.2633967 5.263536  5.2634354 5.263477  5.263466  5.2627244 5.2622375
  5.262067  5.2612453 5.261221  5.2612305 5.2613034 5.2611704 5.2610903
  5.2612996 5.2611165 5.26091   5.2609487 5.260955  5.260656  5.26059
  5.2606    5.2604733 5.260376  5.2603726 5.2602916 5.2600665 5.2600937
  5.260064  5.2600746]]
X_train.shape:  (2988, 30, 44)
X_test.shape:  (748, 30, 44)
y_train.shape:  (2988, 1, 30)
y_test.shape:  (748, 1, 30)
Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_24 (LSTM)               (None, 30, 64)            27904     
_________________________________________________________________
lstm_25 (LSTM)               (None, 30, 128)           98816     
_________________________________________________________________
dropout_8 (Dropout)          (None, 30, 128)           0         
____

AttributeError: 'list' object has no attribute 'to_csv'

In [50]:
pd.DataFrame(final_pre).to_csv('final_pre.csv',index=False,header=True) 

ValueError: Must pass 2-d input. shape=(5, 1, 30)

In [51]:
final_pre

[array([[3.513096 , 3.5142941, 3.514343 , 3.5143735, 3.5151334, 3.515776 ,
         3.5132782, 3.5153294, 3.5158706, 3.5163882, 3.5169113, 3.5152533,
         3.515257 , 3.5175238, 3.5189269, 3.5148804, 3.5132825, 3.5139997,
         3.513206 , 3.5135252, 3.5128067, 3.5112307, 3.511518 , 3.5128229,
         3.5129488, 3.5129685, 3.5124998, 3.5128512, 3.5128493, 3.5141168]],
       dtype=float32),
 array([[4.031071 , 4.0315194, 4.0321174, 4.0326886, 4.032132 , 4.032765 ,
         4.0339274, 4.034975 , 4.0357757, 4.0363503, 4.0363607, 4.0373836,
         4.0371566, 4.0377464, 4.0383544, 4.0383263, 4.040042 , 4.040608 ,
         4.0411234, 4.0411596, 4.040986 , 4.04148  , 4.0418873, 4.0425196,
         4.042497 , 4.042802 , 4.0432677, 4.0432787, 4.0433655, 4.0429463]],
       dtype=float32),
 array([[3.9088159, 3.898198 , 3.8853476, 3.9024794, 3.8846574, 3.9104345,
         3.9015927, 3.9088993, 3.8941846, 3.9061675, 3.8932173, 3.9125257,
         3.8899252, 3.892897 , 3.9299946, 3.899489