In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from models import TimeSeriesModel
from tools import create_sequence

In [None]:
df = pd.read_csv('stock_features.csv', parse_dates=True)
df['date'] = pd.to_datetime(df['date'])
target ='close'
features = df.drop(columns=['date', target]).values
X = features.copy()
y = df[target].copy().values.reshape(-1, 1)

print(X.shape, y.shape)


In [None]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_train_scaled = scaler_X.fit_transform(X)
y_train_scaled = scaler_y.fit_transform(y)

X_train_scaled, y_train_scaled = create_sequence(X_train_scaled, y_train_scaled, time_steps=60)
print(X_train_scaled.shape, y_train_scaled.shape)

In [None]:
config = {
    "model_name": "LSTM",
    "input_window": 60,
    "label_window": 1,
    "units": 32,
    "lstm_layers": 2,
    "dense_layers": 1,
    "dropout_rate": 0.3,
    "l2_rate": 0.01,
    "learning_rate": 0.001,
    "batch_size": 128,
    "epochs": 20,
}

In [None]:
tscv = TimeSeriesSplit(n_splits=5, test_size=30)
for fold, (train_index, val_index) in enumerate(tscv.split(X_train_scaled)):
    print(f"Fold {fold + 1}:")
    X_train_fold, X_val_fold = X_train_scaled[train_index], X_train_scaled[val_index]
    y_train_fold, y_val_fold = y_train_scaled[train_index], y_train_scaled[val_index]
    # 构建模型
    tsm = TimeSeriesModel(config=config)
    model = tsm.build_model(input_shape=(X_train_fold.shape[1], X_train_fold.shape[2]))
    # 训练模型
    history = tsm.fit(X_train_fold, y_train_fold, (X_val_fold, y_val_fold))
    val_predictions = tsm.predict(X_val_fold)
    # 预测结果反转归一化
    val_predictions_prices = scaler_y.inverse_transform(val_predictions)
    # 真实值反转归一化
    val_real_prices = scaler_y.inverse_transform(y_val_fold.reshape(-1, 1))

    mse = mean_squared_error(val_real_prices, val_predictions_prices)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(val_real_prices, val_predictions_prices)
    r2 = r2_score(val_real_prices, val_predictions_prices)
    print(f"Fold {fold + 1} for window - \nMSE: {mse} \nRMSE: {rmse}  \nMAE: {mae} \nR2: {r2}")
    # 打印结果
    plt.figure(figsize=(12, 6))
    plt.plot(val_predictions_prices[-100:], label='predicted_prices')
    plt.plot(val_real_prices[-100:], label='real_prices')
    plt.title('Model Accuracy History')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend()
    plt.grid(True)
    plt.show()
    tsm.plot_history(history=history, fold=fold)