In [79]:
import pandas_datareader.data as web
import datetime

start = datetime.datetime(2000, 1, 1)
end = datetime.datetime(2021, 8, 11)
df = web.DataReader('GOOGL', 'stooq', start, end)

In [80]:
df.dropna(inplace=True)
df.sort_index(inplace=True)  # 按时间排序
pre_days = 10  # 预测10天后
df['label'] = df['Close'].shift(-pre_days)  # 收盘价前移10天作为label
print(df)

                 Open      High        Low      Close       Volume    label
Date                                                                       
2004-08-19    2.50000    2.6015    2.39900    2.50850  894076000.0  2.53775
2004-08-20    2.52525    2.7270    2.51250    2.70775  457144000.0  2.50025
2004-08-23    2.76875    2.8370    2.72625    2.73500  365488000.0  2.53950
2004-08-24    2.78100    2.7900    2.58925    2.62175  305252000.0  2.55750
2004-08-25    2.62400    2.7000    2.59700    2.65000  183956000.0  2.55775
...               ...       ...        ...        ...          ...      ...
2021-08-05  135.68000  136.3500  134.84600  136.25200   17869000.0      NaN
2021-08-06  136.04200  136.4720  135.21000  135.73800   20488120.0      NaN
2021-08-09  135.94200  137.1840  135.33900  136.91300   17766000.0      NaN
2021-08-10  137.34200  137.7720  136.36400  136.80700   19414080.0      NaN
2021-08-11  137.18200  137.6820  136.27200  136.27900   15352000.0      NaN

[4274 rows 

In [81]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
sca_X = scaler.fit_transform(df.iloc[:, :-1])  # 标准化(不包括最后一列label)
print(sca_X)

[[-1.10502268 -1.10212588 -1.10698705 -1.10384759  4.89034655]
 [-1.10403672 -1.0972712  -1.10251527 -1.09607465  2.08674174]
 [-1.09452862 -1.0930161  -1.09409374 -1.0950116   1.49862459]
 ...
 [ 4.10557699  4.10389003  4.13070928  4.13940934 -0.73255801]
 [ 4.16024373  4.12663547  4.1710932   4.13527417 -0.72198299]
 [ 4.1539961   4.12315402  4.1674685   4.11467635 -0.74804761]]


In [82]:
mem_his_days = 10  # 记忆天数: 10天

from collections import deque

deq = deque(maxlen=mem_his_days)  # 设定队列, 最大长度为记忆天数

X = []
for i in sca_X:
    deq.append(list(i))
    if len(deq) == mem_his_days:
        X.append(list(deq))

X_lately = X[-pre_days:]
X = X[:-pre_days]  # 删除最后几行(预测天数), 因为没有label
print(len(X))
print(len(X_lately))

y = df['label'].values[mem_his_days - 1:-pre_days]
print(len(y))

4255
10
4255


In [85]:
X

array([[[-1.10502268, -1.10212588, -1.10698705, -1.10384759,
          4.89034655],
        [-1.10403672, -1.0972712 , -1.10251527, -1.09607465,
          2.08674174],
        [-1.09452862, -1.0930161 , -1.09409374, -1.0950116 ,
          1.49862459],
        ...,
        [-1.09986838, -1.10074297, -1.10102796, -1.10221888,
         -0.17897484],
        [-1.10277744, -1.10246435, -1.10088021, -1.10186778,
         -0.21480496],
        [-1.10238696, -1.10317998, -1.10333279, -1.10393537,
          0.32734388]],

       [[-1.10403672, -1.0972712 , -1.10251527, -1.09607465,
          2.08674174],
        [-1.09452862, -1.0930161 , -1.09409374, -1.0950116 ,
          1.49862459],
        [-1.09405028, -1.09483419, -1.0994914 , -1.09942959,
          1.11211607],
        ...,
        [-1.10277744, -1.10246435, -1.10088021, -1.10186778,
         -0.21480496],
        [-1.10238696, -1.10317998, -1.10333279, -1.10393537,
          0.32734388],
        [-1.10581339, -1.10376022, -1.10405183, 

In [84]:
import numpy as np

X = np.array(X)
y = np.array(y)
print(X.shape)
print(y.shape)

(4255, 10, 5)
(4255,)


In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

构建lstm

In [9]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential()
# 构建第一层
model.add(LSTM(10, input_shape=X.shape[1:], activation='tanh', recurrent_activation='sigmoid', return_sequences=True))
model.add(Dropout(0.1))  # 为防止过拟合, 删除0.1%的神经元

# 构建第二层
model.add(LSTM(10, activation='tanh', recurrent_activation='sigmoid', return_sequences=True))
model.add(Dropout(0.1))  # 为防止过拟合, 删除0.1%的神经元

# 构建第三层
model.add(LSTM(10, activation='tanh', recurrent_activation='sigmoid'))
model.add(Dropout(0.1))  # 为防止过拟合, 删除0.1%的神经元

# 构建全连接层
model.add(Dense(10, activation='tanh'))
model.add(Dropout(0.1))  # 为防止过拟合, 删除0.1%的神经元

# 输出层
model.add(Dense(1))

# 编译
model.compile(optimizer='adam',
              loss='mse',
              metrics=['mape'])

# 训练模型
model.fit(X_train, y_train, batch_size=32, epochs=50, validation_data=(X_test, y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1e9207b5670>

In [68]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential()
# 构建第一层
model.add(LSTM(10, input_shape=X.shape[1:], activation='relu', return_sequences=True))
model.add(Dropout(0.1))  # 为防止过拟合, 删除0.1%的神经元

# 构建第二层
model.add(LSTM(10, activation='relu', return_sequences=True))
model.add(Dropout(0.1))  # 为防止过拟合, 删除0.1%的神经元

# 构建第三层
model.add(LSTM(10, activation='relu'))
model.add(Dropout(0.1))  # 为防止过拟合, 删除0.1%的神经元

# 构建全连接层
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.1))  # 为防止过拟合, 删除0.1%的神经元

# 输出层
model.add(Dense(1))

# 编译
model.compile(optimizer='adam',
              loss='mse',
              metrics=['mape'])

# 训练模型
model.fit(X_train, y_train, batch_size=32, epochs=50, validation_data=(X_test, y_test))

Epoch 1/50


2022-11-25 16:38:54.942341: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2022-11-25 16:39:10.090685: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x2963aedf0>