<a href="https://colab.research.google.com/github/wannasmile/colab_code_note/blob/main/QUANT024.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install darts



In [2]:
import pandas as pd

# 下载数据
def download_stock_data():
    # 定义需要处理的股票代码列表
    stocks = ['000001.SS', 'AAPL', 'BTC-USD', 'DJI', 'GSPC', 'IXIC']

    for stock in stocks:
        try:
            filename = f'https://raw.githubusercontent.com/wannasmile/colab_code_note/refs/heads/main/{stock}.csv'
            df = pd.read_csv(filename,
                           delimiter=',',
                           usecols=['Date','Open','High','Low','Close','Adj Close','Volume'])
            df = df.sort_values('Date')
            print(f"{'#' * 50}\n{stock}")
            print(df.head(2))
            print('#' * 50)
            df.to_csv(f'{stock}.csv', index=False)
        except Exception as e:
            print(f"Error downloading {stock}: {str(e)}")

download_stock_data()

##################################################
000001.SS
         Date         Open         High          Low        Close  \
0  2011-07-01  2767.833008  2778.667969  2752.966064  2759.362061   
1  2011-07-04  2770.939941  2813.270020  2770.939941  2812.818115   

     Adj Close  Volume  
0  2759.362061   92000  
1  2812.818115  122000  
##################################################
##################################################
AAPL
         Date       Open       High        Low      Close  Adj Close  \
0  2011-07-01  11.998214  12.267857  11.935714  12.259286  10.391948   
1  2011-07-05  12.250000  12.493929  12.232143  12.479643  10.578743   

      Volume  
0  435313200  
1  355054000  
##################################################
##################################################
BTC-USD
         Date        Open        High         Low       Close   Adj Close  \
0  2014-09-17  465.864014  468.174011  452.421997  457.334015  457.334015   
1  2014-09-18  456.8599

In [3]:
from darts.dataprocessing.transformers import Scaler
from darts import TimeSeries
from darts.utils.timeseries_generation import gaussian_timeseries, linear_timeseries, sine_timeseries
from darts.models import RNNModel, TCNModel, TransformerModel, NBEATSModel, BlockRNNModel
from darts.metrics import mape, smape
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (20,15)
import pandas as pd

In [4]:
import warnings
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime, date, timedelta
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
from darts.metrics import mape, rmse, mae, mse
from darts.models import NBEATSModel

warnings.filterwarnings("ignore")
import logging
logging.disable(logging.CRITICAL)

# 读取股票数据
data0 = pd.read_csv("000001.SS.csv")
print(f"原始数据形状: {data0.shape}")
print(f"原始数据前5行:\n{data0.head()}")

# 数据预处理 - 提取所需列
# 方式1: 仅使用Close作为预测目标
data_close = data0[['Date', 'Close']].copy()

# 方式2: 使用所有基本特征
data_all_features = data0[['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']].copy()

# 1. 将日期列转换为日期类型
data_close['Date'] = pd.to_datetime(data_close['Date'])
data_all_features['Date'] = pd.to_datetime(data_all_features['Date'])

# 2. 仅使用最近的501个交易日的数据
data_close = data_close[-501:].reset_index(drop=True)
data_all_features = data_all_features[-501:].reset_index(drop=True)

# 3. 创建连续日期序列并进行填充，处理非交易日
start_date = data_close['Date'].min().date()
end_date = data_close['Date'].max().date()
dates_range = []

for i in range((end_date - start_date).days + 1):
    dates_range.append((start_date + timedelta(i)).strftime('%Y-%m-%d'))

continuous_dates = pd.DataFrame({'Date': dates_range})
continuous_dates['Date'] = pd.to_datetime(continuous_dates['Date'])

# 合并并填充缺失值
data_close_continuous = continuous_dates.merge(data_close, how='left', on='Date')
data_close_continuous = data_close_continuous.fillna(method='ffill')

data_all_features_continuous = continuous_dates.merge(data_all_features, how='left', on='Date')
data_all_features_continuous = data_all_features_continuous.fillna(method='ffill')

# 4. 创建差分特征 (1阶差分)
data_close_continuous['Close_Diff'] = data_close_continuous['Close'].diff()
data_all_features_continuous['Close_Diff'] = data_all_features_continuous['Close'].diff()
data_all_features_continuous['Adj_Close_Diff'] = data_all_features_continuous['Adj Close'].diff()

# 删除含有NaN的行 (第一行会因为差分操作产生NaN)
data_close_continuous = data_close_continuous.dropna()
data_all_features_continuous = data_all_features_continuous.dropna()

# 5. 可视化原始收盘价数据
fig = px.line(data_close_continuous, x='Date', y='Close', title='收盘价')
fig.update_layout(xaxis_title='日期', yaxis_title='收盘价', width=900, height=500)
fig.show()

# 6. 可视化差分后的数据
fig = px.line(data_close_continuous, x='Date', y='Close_Diff', title='收盘价一阶差分')
fig.update_layout(xaxis_title='日期', yaxis_title='收盘价差分', width=900, height=500)
fig.show()

# 7. 创建Darts时间序列对象
# 单特征 - 收盘价
series_close = TimeSeries.from_dataframe(data_close_continuous, time_col='Date', value_cols='Close')
# 单特征 - 收盘价差分
series_close_diff = TimeSeries.from_dataframe(data_close_continuous, time_col='Date', value_cols='Close_Diff')
# 多特征
series_multi = TimeSeries.from_dataframe(
    data_all_features_continuous,
    time_col='Date',
    value_cols=['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
)

# 8. 数据标准化
scaler_close = Scaler()
series_close_scaled = scaler_close.fit_transform(series_close)

scaler_close_diff = Scaler()
series_close_diff_scaled = scaler_close_diff.fit_transform(series_close_diff)

scaler_multi = Scaler()
series_multi_scaled = scaler_multi.fit_transform(series_multi)

# 9. 查看标准化后的数据
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=series_close_scaled.time_index,
    y=series_close_scaled.values().flatten(),
    mode='lines',
    name='标准化后的收盘价'
))
fig.update_layout(
    title='标准化后的收盘价',
    xaxis_title='日期',
    yaxis_title='标准化值',
    width=900,
    height=500
)
fig.show()

原始数据形状: (2912, 7)
原始数据前5行:
         Date         Open         High          Low        Close  \
0  2011-07-01  2767.833008  2778.667969  2752.966064  2759.362061   
1  2011-07-04  2770.939941  2813.270020  2770.939941  2812.818115   
2  2011-07-05  2812.721924  2818.141113  2799.110107  2816.354004   
3  2011-07-06  2811.814941  2811.814941  2780.729004  2810.479004   
4  2011-07-07  2813.193115  2825.123047  2793.892090  2794.267090   

     Adj Close  Volume  
0  2759.362061   92000  
1  2812.818115  122000  
2  2816.354004  110200  
3  2810.479004  103200  
4  2794.267090  116600  


In [5]:
# 方案1: 使用验证集 - 70% 训练, 15% 验证, 15% 测试
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# 计算分割点
train_size = int(len(series_close_scaled) * train_ratio)
val_size = int(len(series_close_scaled) * val_ratio)

# 单特征收盘价数据划分
train_close, val_close_test = series_close_scaled.split_before(train_size)
val_close, test_close = val_close_test.split_before(val_size)

# 单特征收盘价差分数据划分
train_close_diff, val_close_diff_test = series_close_diff_scaled.split_before(train_size)
val_close_diff, test_close_diff = val_close_diff_test.split_before(val_size)

# 多特征数据划分
train_multi, val_multi_test = series_multi_scaled.split_before(train_size)
val_multi, test_multi = val_multi_test.split_before(val_size)

# 可视化数据集划分 (收盘价)
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=train_close.time_index,
    y=train_close.values().flatten(),
    mode='lines',
    name='训练集'
))
fig.add_trace(go.Scatter(
    x=val_close.time_index,
    y=val_close.values().flatten(),
    mode='lines',
    name='验证集'
))
fig.add_trace(go.Scatter(
    x=test_close.time_index,
    y=test_close.values().flatten(),
    mode='lines',
    name='测试集'
))
fig.update_layout(
    title='数据集划分 - 收盘价',
    xaxis_title='日期',
    yaxis_title='标准化收盘价',
    width=900,
    height=500
)
fig.show()

# 打印数据集大小
print(f"\n使用验证集:")
print(f"训练集大小: {len(train_close)} ({train_close.start_time()} 至 {train_close.end_time()})")
print(f"验证集大小: {len(val_close)} ({val_close.start_time()} 至 {val_close.end_time()})")
print(f"测试集大小: {len(test_close)} ({test_close.start_time()} 至 {test_close.end_time()})")

# 方案2: 不使用验证集 - 85% 训练, 15% 测试
train_ratio2 = 0.85
test_ratio2 = 0.15

# 计算分割点
train_size2 = int(len(series_close_scaled) * train_ratio2)

# 单特征收盘价数据划分
train_close2, test_close2 = series_close_scaled.split_before(train_size2)

# 单特征收盘价差分数据划分
train_close_diff2, test_close_diff2 = series_close_diff_scaled.split_before(train_size2)

# 多特征数据划分
train_multi2, test_multi2 = series_multi_scaled.split_before(train_size2)

# 打印数据集大小
print(f"\n不使用验证集:")
print(f"训练集大小: {len(train_close2)} ({train_close2.start_time()} 至 {train_close2.end_time()})")
print(f"测试集大小: {len(test_close2)} ({test_close2.start_time()} 至 {test_close2.end_time()})")


使用验证集:
训练集大小: 526 (2021-06-08 00:00:00 至 2022-11-15 00:00:00)
验证集大小: 112 (2022-11-16 00:00:00 至 2023-03-07 00:00:00)
测试集大小: 114 (2023-03-08 00:00:00 至 2023-06-29 00:00:00)

不使用验证集:
训练集大小: 639 (2021-06-08 00:00:00 至 2023-03-08 00:00:00)
测试集大小: 113 (2023-03-09 00:00:00 至 2023-06-29 00:00:00)


In [6]:
import warnings
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime, date, timedelta
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
from darts.metrics import mape, rmse, mae, mse
from darts.models import NBEATSModel

warnings.filterwarnings("ignore")
import logging
logging.disable(logging.CRITICAL)

# 1. 数据准备和预处理
# 读取股票数据
data0 = pd.read_csv("000001.SS.csv")
print(f"原始数据形状: {data0.shape}")
print(f"原始数据前5行:\n{data0.head()}")

# 将日期列转换为日期类型
data0['Date'] = pd.to_datetime(data0['Date'])

# 只使用最近的501个交易日数据
data_original = data0[-501:].reset_index(drop=True)
print(f"使用的数据范围: {data_original['Date'].min()} 至 {data_original['Date'].max()}")

# 创建连续日期序列
start_date = data_original['Date'].min().date()
end_date = data_original['Date'].max().date()

# 确保日期类型统一 - 将所有日期转换为datetime类型
continuous_dates = pd.DataFrame({
    'Date': pd.date_range(start=start_date, end=end_date, freq='D')
})

# 创建一个标记是否为交易日的列
# 首先，将所有日期设为非交易日(0)
continuous_dates['is_trading_day'] = 0

# 然后，在原始数据存在的日期上标记为交易日(1)
trading_dates_set = set(data_original['Date'].dt.date)
continuous_dates['is_trading_day'] = continuous_dates['Date'].dt.date.apply(
    lambda x: 1 if x in trading_dates_set else 0
)

# 合并数据，保留所有连续日期
data_all = pd.merge(continuous_dates, data_original, left_on='Date', how='left', right_on='Date')

# 使用前向填充处理缺失的价格和成交量数据
price_volume_cols = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
data_all[price_volume_cols] = data_all[price_volume_cols].fillna(method='ffill')

# 查看数据
print("合并后的数据形状:", data_all.shape)
print("合并后的数据前5行:")
print(data_all.head())

# 统计交易日和非交易日数量
trading_days = data_all['is_trading_day'].sum()
non_trading_days = len(data_all) - trading_days
print(f"交易日数量: {trading_days}, 非交易日数量: {non_trading_days}")

# 创建差分特征
data_all['Close_Diff'] = data_all['Close'].diff()
data_all['Volume_Diff'] = data_all['Volume'].diff()

# 替换第一行的NaN
data_all['Close_Diff'].fillna(0, inplace=True)
data_all['Volume_Diff'].fillna(0, inplace=True)

# 可视化数据，标记交易日和非交易日
fig = go.Figure()
# 所有数据
fig.add_trace(go.Scatter(
    x=data_all['Date'],
    y=data_all['Close'],
    mode='lines',
    name='所有日期收盘价',
    line=dict(color='lightgrey')
))
# 只有交易日
trading_data = data_all[data_all['is_trading_day'] == 1]
fig.add_trace(go.Scatter(
    x=trading_data['Date'],
    y=trading_data['Close'],
    mode='markers',
    name='交易日收盘价',
    marker=dict(color='blue', size=6)
))
fig.update_layout(
    title='收盘价 (标记交易日)',
    xaxis_title='日期',
    yaxis_title='收盘价',
    width=900,
    height=500
)
fig.show()

# 2. 创建时间序列对象
# 创建多特征时间序列和协变量
# 主要特征: Open, High, Low, Close, Adj Close, Volume, Close_Diff, Volume_Diff
# 协变量: is_trading_day
# 目标是预测Close

# 创建特征时间序列
feature_cols = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Close_Diff', 'Volume_Diff']
target_series = TimeSeries.from_dataframe(
    data_all,
    time_col='Date',
    value_cols=feature_cols
)

# 创建单独的收盘价时间序列（用于创建专用的缩放器）
close_series = TimeSeries.from_dataframe(
    data_all,
    time_col='Date',
    value_cols=['Close']
)

# 创建协变量时间序列 (交易日标志)
covariates = TimeSeries.from_dataframe(
    data_all,
    time_col='Date',
    value_cols=['is_trading_day']
)

# 标准化特征数据
scaler_features = Scaler()
scaled_features = scaler_features.fit_transform(target_series)

# 为收盘价创建单独的缩放器
scaler_close = Scaler()
scaled_close = scaler_close.fit_transform(close_series)

# 无需标准化协变量 (二值特征)
print("主要特征形状:", scaled_features.shape)
print("收盘价形状:", scaled_close.shape)
print("协变量形状:", covariates.shape)

# 3. 数据集划分 - 使用70% 训练, 15% 验证, 15% 测试
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# 计算分割点
train_size = int(len(scaled_features) * train_ratio)
val_size = int(len(scaled_features) * val_ratio)

# 特征数据划分
train_features, val_test_features = scaled_features.split_before(train_size)
val_features, test_features = val_test_features.split_before(val_size)

# 收盘价数据划分
train_close, val_test_close = scaled_close.split_before(train_size)
val_close, test_close = val_test_close.split_before(val_size)

# 协变量数据划分
train_cov, val_test_cov = covariates.split_before(train_size)
val_cov, test_cov = val_test_cov.split_before(val_size)

# 同样划分原始目标序列（用于评估）
train_data, val_test_data = target_series.split_before(train_size)
val_data, test_data = val_test_data.split_before(val_size)

# 为了可视化，获取Close列的索引
close_idx = feature_cols.index('Close')

# 提取多特征中的Close列用于可视化
train_close_from_features = train_features.univariate_component(close_idx)
val_close_from_features = val_features.univariate_component(close_idx)
test_close_from_features = test_features.univariate_component(close_idx)

# 可视化数据集划分
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=train_close.time_index,
    y=train_close.values().flatten(),
    mode='lines',
    name='训练集'
))
fig.add_trace(go.Scatter(
    x=val_close.time_index,
    y=val_close.values().flatten(),
    mode='lines',
    name='验证集'
))
fig.add_trace(go.Scatter(
    x=test_close.time_index,
    y=test_close.values().flatten(),
    mode='lines',
    name='测试集'
))
fig.update_layout(
    title='数据集划分 - 收盘价 (标准化)',
    xaxis_title='日期',
    yaxis_title='标准化收盘价',
    width=900,
    height=500
)
fig.show()

# 打印数据集大小
print(f"训练集大小: {len(train_features)} ({train_features.start_time()} 至 {train_features.end_time()})")
print(f"验证集大小: {len(val_features)} ({val_features.start_time()} 至 {val_features.end_time()})")
print(f"测试集大小: {len(test_features)} ({test_features.start_time()} 至 {test_features.end_time()})")

# 4. 创建并训练模型
# NBEATS模型
input_chunk_length = 20  # 使用过去20天数据
output_chunk_length = 1  # 预测未来1天

model = NBEATSModel(
    input_chunk_length=input_chunk_length,
    output_chunk_length=output_chunk_length,
    n_epochs=100,
    nr_epochs_val_period=10,
    batch_size=32,
    generic_architecture=True,
    num_stacks=10,
    num_blocks=1,
    num_layers=4,
    layer_widths=256,
    dropout=0.1,
    model_name="nbeats_multi_features_with_cov",
    random_state=42,
    optimizer_kwargs={"lr": 1e-4}
)

# 训练模型 - 使用过去协变量
print("开始训练多特征模型（带交易日协变量）...")
model.fit(
    series=train_features,
    val_series=val_features,
    past_covariates=covariates,
    val_past_covariates=val_cov,
    verbose=True
)


## 5. 模型预测和评估
## 使用滚动预测方式评估测试集
#print("\n执行测试集滚动预测...")
#
## 初始化，从训练和验证数据开始
#historical_data = train_features.append(val_features)
#test_pred_values = []
#test_dates = test_features.time_index
#
## 逐步预测
#for i in range(len(test_features)):
#    # 向前预测一步
#    next_pred = model.predict(
#        n=1,
#        series=historical_data,
#        past_covariates=covariates  # 使用整个协变量序列
#    )
#
#    # 保存预测结果
#    test_pred_values.append(next_pred.values()[0])
#
#    # 更新历史数据，添加真实的测试数据点
#    # 使用slice方法获取单个时间点的数据
#    current_time = test_dates[i]
#    next_actual = test_features.slice(current_time, current_time)
#    historical_data = historical_data.append(next_actual)
#
## 创建预测时间序列对象
#test_pred = TimeSeries.from_times_and_values(
#    times=pd.DatetimeIndex(test_dates),  # 确保使用DatetimeIndex
#    values=np.array(test_pred_values)
#)



# 5. 模型预测和评估
# 预测测试集
print("\n执行测试集预测...")
test_pred = model.predict(
    n=len(test_features),
    series=train_features.append(val_features),
    past_covariates=covariates
)


# 获取测试集的真实值和预测值 (Close列)
test_actual_close_from_features = test_features.univariate_component(close_idx)
test_pred_close_from_features = test_pred.univariate_component(close_idx)


# 创建一个权重数组，根据是否为交易日设置不同权重
weights = []

# 获取测试集对应的交易日标志
test_period_start = test_close.start_time()
test_period_end = test_close.end_time()
test_covariates = covariates.slice(test_period_start, test_period_end)

for i, value in enumerate(test_covariates.values().flatten()):
    # 交易日权重为1，非交易日权重为0.2
    weight = 1.0 if value == 1 else 0.2
    weights.append(weight)

weights = np.array(weights)

# 计算加权性能指标
def weighted_mape(actual, pred, weights):
    abs_percentage_error = np.abs((actual.values().flatten() - pred.values().flatten()) / actual.values().flatten())
    return np.average(abs_percentage_error, weights=weights) * 100

def weighted_mae(actual, pred, weights):
    abs_error = np.abs(actual.values().flatten() - pred.values().flatten())
    return np.average(abs_error, weights=weights)

def weighted_rmse(actual, pred, weights):
    squared_error = (actual.values().flatten() - pred.values().flatten()) ** 2
    weighted_mse = np.average(squared_error, weights=weights)
    return np.sqrt(weighted_mse)

# 计算加权指标
w_mae = weighted_mae(test_actual_close_from_features, test_pred_close_from_features, weights)
w_rmse = weighted_rmse(test_actual_close_from_features, test_pred_close_from_features, weights)
w_mape = weighted_mape(test_actual_close_from_features, test_pred_close_from_features, weights)

# 计算标准指标（不加权）
standard_mae = mae(test_actual_close_from_features, test_pred_close_from_features)
standard_rmse = rmse(test_actual_close_from_features, test_pred_close_from_features)
standard_mape = mape(test_actual_close_from_features, test_pred_close_from_features)

# 反标准化以获取原始尺度的预测 - 使用专门的收盘价缩放器
# 先将多特征提取的Close列转换为单独的时间序列
test_actual_close_ts = TimeSeries.from_times_and_values(
    times=test_actual_close_from_features.time_index,
    values=test_actual_close_from_features.values()
)

test_pred_close_ts = TimeSeries.from_times_and_values(
    times=test_pred_close_from_features.time_index,
    values=test_pred_close_from_features.values()
)

# 然后使用收盘价专用的缩放器进行反转
test_actual_close_original = scaler_close.inverse_transform(test_actual_close_ts)
test_pred_close_original = scaler_close.inverse_transform(test_pred_close_ts)

# 提取交易日和非交易日的数据
test_dates = test_actual_close_original.time_index
trading_indices = np.where(test_covariates.values().flatten() == 1)[0]
non_trading_indices = np.where(test_covariates.values().flatten() == 0)[0]

print(f"测试集中交易日数量: {len(trading_indices)}, 非交易日数量: {len(non_trading_indices)}")

# 分别计算交易日和非交易日的指标
if len(trading_indices) > 0:
    trading_actual = test_actual_close_original.values().flatten()[trading_indices]
    trading_pred = test_pred_close_original.values().flatten()[trading_indices]
    trading_mape = np.mean(np.abs((trading_actual - trading_pred) / trading_actual)) * 100
    trading_mae = np.mean(np.abs(trading_actual - trading_pred))
    trading_rmse = np.sqrt(np.mean((trading_actual - trading_pred) ** 2))
else:
    trading_mape = trading_mae = trading_rmse = np.nan

if len(non_trading_indices) > 0:
    non_trading_actual = test_actual_close_original.values().flatten()[non_trading_indices]
    non_trading_pred = test_pred_close_original.values().flatten()[non_trading_indices]
    non_trading_mape = np.mean(np.abs((non_trading_actual - non_trading_pred) / non_trading_actual)) * 100
    non_trading_mae = np.mean(np.abs(non_trading_actual - non_trading_pred))
    non_trading_rmse = np.sqrt(np.mean((non_trading_actual - non_trading_pred) ** 2))
else:
    non_trading_mape = non_trading_mae = non_trading_rmse = np.nan

# 可视化测试结果，区分交易日和非交易日
fig = go.Figure()

# 添加真实值
fig.add_trace(go.Scatter(
    x=test_actual_close_original.time_index,
    y=test_actual_close_original.values().flatten(),
    mode='lines',
    name='实际值',
    line=dict(color='black')
))

# 添加预测值
fig.add_trace(go.Scatter(
    x=test_pred_close_original.time_index,
    y=test_pred_close_original.values().flatten(),
    mode='lines',
    name='预测值',
    line=dict(dash='dash', color='blue')
))

# 标记交易日
if len(trading_indices) > 0:
    trading_dates = [test_dates[i] for i in trading_indices]
    fig.add_trace(go.Scatter(
        x=trading_dates,
        y=test_actual_close_original.values().flatten()[trading_indices],
        mode='markers',
        name='交易日',
        marker=dict(color='green', size=8, symbol='circle')
    ))

fig.update_layout(
    title=f'测试集预测结果 (加权MAPE: {w_mape:.2f}%, 标准MAPE: {standard_mape:.2f}%)',
    xaxis_title='日期',
    yaxis_title='收盘价',
    width=900,
    height=500,
    legend=dict(x=0.01, y=0.99, orientation='h')
)
fig.show()

# 打印性能指标
print("\n预测性能评估:")
print(f"{'指标':<20} {'加权值':<15} {'标准值':<15}")
print(f"{'-'*50}")
print(f"{'MAE':<20} {w_mae:<15.4f} {standard_mae:<15.4f}")
print(f"{'RMSE':<20} {w_rmse:<15.4f} {standard_rmse:<15.4f}")
print(f"{'MAPE(%)':<20} {w_mape:<15.4f} {standard_mape:<15.4f}")

print("\n交易日与非交易日性能比较:")
print(f"{'指标':<20} {'交易日':<15} {'非交易日':<15}")
print(f"{'-'*50}")
print(f"{'MAE':<20} {trading_mae:<15.4f} {non_trading_mae:<15.4f}")
print(f"{'RMSE':<20} {trading_rmse:<15.4f} {non_trading_rmse:<15.4f}")
print(f"{'MAPE(%)':<20} {trading_mape:<15.4f} {non_trading_mape:<15.4f}")

# 6. 预测未来
# 使用全部数据重新训练一个模型用于未来预测
final_model = NBEATSModel(
    input_chunk_length=input_chunk_length,
    output_chunk_length=output_chunk_length,
    n_epochs=150,  # 增加轮数
    batch_size=32,
    generic_architecture=True,
    num_stacks=10,
    num_blocks=1,
    num_layers=4,
    layer_widths=256,
    dropout=0.1,
    model_name="nbeats_final_with_cov",
    random_state=42,
    optimizer_kwargs={"lr": 1e-4}
)

# 使用全部数据训练
print("\n训练最终预测模型...")
final_model.fit(
    series=scaled_features,
    past_covariates=covariates,
    verbose=True
)

# 需要为未来预测创建交易日协变量
# 首先确定未来的日期范围
forecast_horizon = 10  # 预测未来10天
last_date = data_all['Date'].max()
future_dates = [last_date + timedelta(days=i+1) for i in range(forecast_horizon)]

# 创建未来的交易日协变量 (这里我们需要知道哪些是交易日)
# 简单方法：假设周一至周五是交易日，周六日是非交易日
future_is_trading = []
for date in future_dates:
    # 如果是周末，则不是交易日
    is_trading = 0 if date.weekday() >= 5 else 1
    future_is_trading.append(is_trading)

# 创建扩展的协变量时间序列 (包括历史和未来)
# 对于过去协变量，我们需要提供包含历史和预测期的协变量
extended_dates = pd.DatetimeIndex(list(covariates.time_index) + future_dates)
extended_is_trading = np.concatenate([covariates.values().flatten(), np.array(future_is_trading)])

extended_covariates = TimeSeries.from_times_and_values(
    times=extended_dates,
    values=extended_is_trading.reshape(-1, 1)
)

# 预测未来
print("\n预测未来收盘价...")
future_pred_scaled = final_model.predict(
    n=forecast_horizon,
    series=scaled_features,
    past_covariates=extended_covariates
)

# 提取预测的Close列并转换为单独的时间序列
future_pred_close_from_features = future_pred_scaled.univariate_component(close_idx)
future_pred_close_ts = TimeSeries.from_times_and_values(
    times=future_pred_close_from_features.time_index,
    values=future_pred_close_from_features.values()
)

# 使用收盘价专用的缩放器反转
future_pred_close_original = scaler_close.inverse_transform(future_pred_close_ts)

# 可视化未来预测
fig = go.Figure()

# 显示历史数据（最后30个点）
historical_close = close_series  # 原始的收盘价数据
fig.add_trace(go.Scatter(
    x=historical_close.time_index[-30:],
    y=historical_close.values()[-30:].flatten(),
    mode='lines',
    name='历史数据',
    line=dict(color='black')
))

# 添加预测 - 区分交易日和非交易日
all_future_dates = future_pred_close_original.time_index
all_future_values = future_pred_close_original.values().flatten()

trading_dates = []
trading_values = []
non_trading_dates = []
non_trading_values = []

for i, date in enumerate(all_future_dates):
    if future_is_trading[i] == 1:
        trading_dates.append(date)
        trading_values.append(all_future_values[i])
    else:
        non_trading_dates.append(date)
        non_trading_values.append(all_future_values[i])

# 所有预测
fig.add_trace(go.Scatter(
    x=all_future_dates,
    y=all_future_values,
    mode='lines',
    name='未来预测',
    line=dict(dash='dash', color='blue')
))

# 交易日预测
if trading_dates:
    fig.add_trace(go.Scatter(
        x=trading_dates,
        y=trading_values,
        mode='markers',
        name='交易日预测',
        marker=dict(color='green', size=10, symbol='circle')
    ))

# 非交易日预测
if non_trading_dates:
    fig.add_trace(go.Scatter(
        x=non_trading_dates,
        y=non_trading_values,
        mode='markers',
        name='非交易日预测',
        marker=dict(color='red', size=8, symbol='x')
    ))

# 添加垂直线表示分界点
fig.add_vline(x=last_date, line_width=1, line_dash="dash", line_color="gray")
fig.add_annotation(x=last_date, y=historical_close.values()[-1][0],
            text="当前", showarrow=True, arrowhead=1, ax=-50, ay=-30)

fig.update_layout(
    title='未来10天收盘价预测 (区分交易日/非交易日)',
    xaxis_title='日期',
    yaxis_title='收盘价',
    width=900,
    height=500,
    legend=dict(x=0.01, y=0.99, orientation='h')
)
fig.show()

# 打印预测值，区分交易日和非交易日
print("\n未来10天的预测收盘价:")
for i, (date, value, is_trading) in enumerate(zip(all_future_dates, all_future_values, future_is_trading)):
    trading_status = "交易日" if is_trading == 1 else "非交易日"
    print(f"日期 {i+1} ({date.date()}, {trading_status}): {value:.2f}")

# 7. 对预测结果进行误差分析
# 计算实际值和预测值之间的加权残差
std_residuals = test_actual_close_from_features.values().flatten() - test_pred_close_from_features.values().flatten()
weighted_residuals = std_residuals * weights

# 可视化残差
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=test_actual_close_from_features.time_index,
    y=std_residuals,
    mode='lines',
    name='标准残差',
    line=dict(color='lightblue')
))
fig.add_trace(go.Scatter(
    x=test_actual_close_from_features.time_index,
    y=weighted_residuals,
    mode='lines',
    name='加权残差',
    line=dict(color='blue')
))
fig.add_hline(y=0, line_width=1, line_dash="dash", line_color="red")
fig.update_layout(
    title='预测残差分析',
    xaxis_title='日期',
    yaxis_title='残差',
    width=900,
    height=400
)
fig.show()

# 分别绘制交易日和非交易日的残差分布
trading_residuals = std_residuals[trading_indices]
non_trading_residuals = std_residuals[non_trading_indices]

# 创建残差分布直方图
fig = make_subplots(rows=1, cols=2, subplot_titles=("交易日残差分布", "非交易日残差分布"))

fig.add_trace(
    go.Histogram(
        x=trading_residuals,
        nbinsx=20,
        name='交易日',
        marker_color='green'
    ),
    row=1, col=1
)

fig.add_trace(
    go.Histogram(
        x=non_trading_residuals,
        nbinsx=20,
        name='非交易日',
        marker_color='red'
    ),
    row=1, col=2
)

fig.update_layout(
    title_text='残差分布比较',
    height=400,
    width=900,
    showlegend=False
)

fig.show()

# 计算残差统计量
trading_residual_mean = np.mean(trading_residuals)
trading_residual_std = np.std(trading_residuals)
non_trading_residual_mean = np.mean(non_trading_residuals)
non_trading_residual_std = np.std(non_trading_residuals)

print("\n残差统计分析:")
print(f"{'统计量':<20} {'交易日':<15} {'非交易日':<15}")
print(f"{'-'*50}")
print(f"{'平均残差':<20} {trading_residual_mean:<15.4f} {non_trading_residual_mean:<15.4f}")
print(f"{'残差标准差':<20} {trading_residual_std:<15.4f} {non_trading_residual_std:<15.4f}")
print(f"{'样本数量':<20} {len(trading_residuals):<15d} {len(non_trading_residuals):<15d}")

原始数据形状: (2912, 7)
原始数据前5行:
         Date         Open         High          Low        Close  \
0  2011-07-01  2767.833008  2778.667969  2752.966064  2759.362061   
1  2011-07-04  2770.939941  2813.270020  2770.939941  2812.818115   
2  2011-07-05  2812.721924  2818.141113  2799.110107  2816.354004   
3  2011-07-06  2811.814941  2811.814941  2780.729004  2810.479004   
4  2011-07-07  2813.193115  2825.123047  2793.892090  2794.267090   

     Adj Close  Volume  
0  2759.362061   92000  
1  2812.818115  122000  
2  2816.354004  110200  
3  2810.479004  103200  
4  2794.267090  116600  
使用的数据范围: 2021-06-07 00:00:00 至 2023-06-29 00:00:00
合并后的数据形状: (753, 8)
合并后的数据前5行:
        Date  is_trading_day         Open         High          Low  \
0 2021-06-07               1  3597.139893  3600.379883  3581.899902   
1 2021-06-08               1  3598.750000  3621.520020  3563.250000   
2 2021-06-09               1  3576.800049  3598.709961  3572.639893   
3 2021-06-10               1  3587.530029  

主要特征形状: (753, 8, 1)
收盘价形状: (753, 1, 1)
协变量形状: (753, 1, 1)


训练集大小: 527 (2021-06-07 00:00:00 至 2022-11-15 00:00:00)
验证集大小: 112 (2022-11-16 00:00:00 至 2023-03-07 00:00:00)
测试集大小: 114 (2023-03-08 00:00:00 至 2023-06-29 00:00:00)
开始训练多特征模型（带交易日协变量）...


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]


执行测试集预测...


Predicting: |          | 0/? [00:00<?, ?it/s]

测试集中交易日数量: 76, 非交易日数量: 38



预测性能评估:
指标                   加权值             标准值            
--------------------------------------------------
MAE                  0.3344          0.3368         
RMSE                 0.3596          0.3608         
MAPE(%)              71.2925         71.5880        

交易日与非交易日性能比较:
指标                   交易日             非交易日           
--------------------------------------------------
MAE                  276.4846        284.6416       
RMSE                 297.6859        301.7336       
MAPE(%)              8.4240          8.6664         

训练最终预测模型...


Training: |          | 0/? [00:00<?, ?it/s]


预测未来收盘价...


Predicting: |          | 0/? [00:00<?, ?it/s]


未来10天的预测收盘价:
日期 1 (2023-06-30, 交易日): 3165.54
日期 2 (2023-07-01, 非交易日): 3153.02
日期 3 (2023-07-02, 非交易日): 3146.99
日期 4 (2023-07-03, 交易日): 3103.10
日期 5 (2023-07-04, 交易日): 3081.58
日期 6 (2023-07-05, 交易日): 3073.63
日期 7 (2023-07-06, 交易日): 3061.31
日期 8 (2023-07-07, 交易日): 3061.55
日期 9 (2023-07-08, 非交易日): 3024.89
日期 10 (2023-07-09, 非交易日): 3017.07



残差统计分析:
统计量                  交易日             非交易日           
--------------------------------------------------
平均残差                 0.3329          0.3423         
残差标准差                0.1347          0.1237         
样本数量                 76              38             


In [7]:
import warnings
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime, date, timedelta
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
from darts.metrics import mape, rmse, mae, mse
from darts.models import NBEATSModel

warnings.filterwarnings("ignore")
import logging
logging.disable(logging.CRITICAL)

# 1. 数据准备和预处理
# 读取股票数据
data0 = pd.read_csv("000001.SS.csv")
print(f"原始数据形状: {data0.shape}")
print(f"原始数据前5行:\n{data0.head()}")

# 将日期列转换为日期类型
data0['Date'] = pd.to_datetime(data0['Date'])

# 只使用最近的501个交易日数据
data_original = data0[-501:].reset_index(drop=True)
print(f"使用的数据范围: {data_original['Date'].min()} 至 {data_original['Date'].max()}")

# 创建连续日期序列
start_date = data_original['Date'].min().date()
end_date = data_original['Date'].max().date()

# 确保日期类型统一 - 将所有日期转换为datetime类型
continuous_dates = pd.DataFrame({
    'Date': pd.date_range(start=start_date, end=end_date, freq='D')
})

# 创建一个标记是否为交易日的列
# 首先，将所有日期设为非交易日(0)
continuous_dates['is_trading_day'] = 0

# 然后，在原始数据存在的日期上标记为交易日(1)
trading_dates_set = set(data_original['Date'].dt.date)
continuous_dates['is_trading_day'] = continuous_dates['Date'].dt.date.apply(
    lambda x: 1 if x in trading_dates_set else 0
)

# 合并数据，保留所有连续日期
data_all = pd.merge(continuous_dates, data_original, left_on='Date', how='left', right_on='Date')

# 使用前向填充处理缺失的价格和成交量数据
price_volume_cols = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
data_all[price_volume_cols] = data_all[price_volume_cols].fillna(method='ffill')

# 查看数据
print("合并后的数据形状:", data_all.shape)
print("合并后的数据前5行:")
print(data_all.head())

# 统计交易日和非交易日数量
trading_days = data_all['is_trading_day'].sum()
non_trading_days = len(data_all) - trading_days
print(f"交易日数量: {trading_days}, 非交易日数量: {non_trading_days}")

# 创建差分特征
data_all['Close_Diff'] = data_all['Close'].diff()
data_all['Volume_Diff'] = data_all['Volume'].diff()

# 替换第一行的NaN
data_all['Close_Diff'].fillna(0, inplace=True)
data_all['Volume_Diff'].fillna(0, inplace=True)

# 可视化数据，标记交易日和非交易日
fig = go.Figure()
# 所有数据
fig.add_trace(go.Scatter(
    x=data_all['Date'],
    y=data_all['Close'],
    mode='lines',
    name='所有日期收盘价',
    line=dict(color='lightgrey')
))
# 只有交易日
trading_data = data_all[data_all['is_trading_day'] == 1]
fig.add_trace(go.Scatter(
    x=trading_data['Date'],
    y=trading_data['Close'],
    mode='markers',
    name='交易日收盘价',
    marker=dict(color='blue', size=6)
))
fig.update_layout(
    title='收盘价 (标记交易日)',
    xaxis_title='日期',
    yaxis_title='收盘价',
    width=900,
    height=500
)
fig.show()

# 2. 创建时间序列对象
# 创建多特征时间序列和协变量
# 主要特征: Open, High, Low, Close, Adj Close, Volume, Close_Diff, Volume_Diff
# 协变量: is_trading_day
# 目标是预测Close

# 创建特征时间序列
feature_cols = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Close_Diff', 'Volume_Diff']
target_series = TimeSeries.from_dataframe(
    data_all,
    time_col='Date',
    value_cols=feature_cols
)

# 创建单独的收盘价时间序列（用于创建专用的缩放器）
close_series = TimeSeries.from_dataframe(
    data_all,
    time_col='Date',
    value_cols=['Close']
)

# 创建协变量时间序列 (交易日标志)
covariates = TimeSeries.from_dataframe(
    data_all,
    time_col='Date',
    value_cols=['is_trading_day']
)

# 标准化特征数据
scaler_features = Scaler()
scaled_features = scaler_features.fit_transform(target_series)

# 为收盘价创建单独的缩放器
scaler_close = Scaler()
scaled_close = scaler_close.fit_transform(close_series)

# 无需标准化协变量 (二值特征)
print("主要特征形状:", scaled_features.shape)
print("收盘价形状:", scaled_close.shape)
print("协变量形状:", covariates.shape)

# 3. 数据集划分 - 使用70% 训练, 15% 验证, 15% 测试
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# 计算分割点
train_size = int(len(scaled_features) * train_ratio)
val_size = int(len(scaled_features) * val_ratio)

# 特征数据划分
train_features, val_test_features = scaled_features.split_before(train_size)
val_features, test_features = val_test_features.split_before(val_size)

# 收盘价数据划分
train_close, val_test_close = scaled_close.split_before(train_size)
val_close, test_close = val_test_close.split_before(val_size)

# 协变量数据划分
train_cov, val_test_cov = covariates.split_before(train_size)
val_cov, test_cov = val_test_cov.split_before(val_size)

# 同样划分原始目标序列（用于评估）
train_data, val_test_data = target_series.split_before(train_size)
val_data, test_data = val_test_data.split_before(val_size)

# 为了可视化，获取Close列的索引
close_idx = feature_cols.index('Close')

# 提取多特征中的Close列用于可视化
train_close_from_features = train_features.univariate_component(close_idx)
val_close_from_features = val_features.univariate_component(close_idx)
test_close_from_features = test_features.univariate_component(close_idx)

# 可视化数据集划分
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=train_close.time_index,
    y=train_close.values().flatten(),
    mode='lines',
    name='训练集'
))
fig.add_trace(go.Scatter(
    x=val_close.time_index,
    y=val_close.values().flatten(),
    mode='lines',
    name='验证集'
))
fig.add_trace(go.Scatter(
    x=test_close.time_index,
    y=test_close.values().flatten(),
    mode='lines',
    name='测试集'
))
fig.update_layout(
    title='数据集划分 - 收盘价 (标准化)',
    xaxis_title='日期',
    yaxis_title='标准化收盘价',
    width=900,
    height=500
)
fig.show()

# 打印数据集大小
print(f"训练集大小: {len(train_features)} ({train_features.start_time()} 至 {train_features.end_time()})")
print(f"验证集大小: {len(val_features)} ({val_features.start_time()} 至 {val_features.end_time()})")
print(f"测试集大小: {len(test_features)} ({test_features.start_time()} 至 {test_features.end_time()})")

# 4. 创建并训练模型
# NBEATS模型
input_chunk_length = 20  # 使用过去20天数据
output_chunk_length = 1  # 预测未来1天

model = NBEATSModel(
    input_chunk_length=input_chunk_length,
    output_chunk_length=output_chunk_length,
    n_epochs=100,
    nr_epochs_val_period=10,
    batch_size=32,
    generic_architecture=True,
    num_stacks=10,
    num_blocks=1,
    num_layers=4,
    layer_widths=256,
    dropout=0.1,
    model_name="nbeats_multi_features_with_cov",
    random_state=42,
    optimizer_kwargs={"lr": 1e-4}
)

# 训练模型 - 使用过去协变量
print("开始训练多特征模型（带交易日协变量）...")
model.fit(
    series=train_features,
    val_series=val_features,
    past_covariates=covariates,
    val_past_covariates=val_cov,
    verbose=True
)



# 5. 模型预测和评估
# 使用滚动预测方式评估测试集
print("\n执行测试集滚动预测...")

# 初始化，从训练和验证数据开始
historical_data = train_features.append(val_features)
test_pred_values = []
test_dates = test_features.time_index

# 逐步预测
for i in range(len(test_features)):
    # 向前预测一步
    next_pred = model.predict(
        n=1,
        series=historical_data,
        past_covariates=covariates  # 使用整个协变量序列
    )

    # 保存预测结果
    test_pred_values.append(next_pred.values()[0])

    # 更新历史数据，添加真实的测试数据点
    # 使用slice方法获取单个时间点的数据
    current_time = test_dates[i]
    next_actual = test_features.slice(current_time, current_time)
    historical_data = historical_data.append(next_actual)

# 创建预测时间序列对象
test_pred = TimeSeries.from_times_and_values(
    times=pd.DatetimeIndex(test_dates),  # 确保使用DatetimeIndex
    values=np.array(test_pred_values)
)

# 获取测试集的真实值和预测值 (Close列)
test_actual_close_from_features = test_features.univariate_component(close_idx)
test_pred_close_from_features = test_pred.univariate_component(close_idx)

# 创建一个权重数组，根据是否为交易日设置不同权重
weights = []

# 获取测试集对应的交易日标志
test_period_start = test_close.start_time()
test_period_end = test_close.end_time()
test_covariates = covariates.slice(test_period_start, test_period_end)

for i, value in enumerate(test_covariates.values().flatten()):
    # 交易日权重为1，非交易日权重为0.2
    weight = 1.0 if value == 1 else 0.2
    weights.append(weight)

weights = np.array(weights)

# 计算加权性能指标
def weighted_mape(actual, pred, weights):
    abs_percentage_error = np.abs((actual.values().flatten() - pred.values().flatten()) / actual.values().flatten())
    return np.average(abs_percentage_error, weights=weights) * 100

def weighted_mae(actual, pred, weights):
    abs_error = np.abs(actual.values().flatten() - pred.values().flatten())
    return np.average(abs_error, weights=weights)

def weighted_rmse(actual, pred, weights):
    squared_error = (actual.values().flatten() - pred.values().flatten()) ** 2
    weighted_mse = np.average(squared_error, weights=weights)
    return np.sqrt(weighted_mse)

# 计算加权指标
w_mae = weighted_mae(test_actual_close_from_features, test_pred_close_from_features, weights)
w_rmse = weighted_rmse(test_actual_close_from_features, test_pred_close_from_features, weights)
w_mape = weighted_mape(test_actual_close_from_features, test_pred_close_from_features, weights)

# 计算标准指标（不加权）
standard_mae = mae(test_actual_close_from_features, test_pred_close_from_features)
standard_rmse = rmse(test_actual_close_from_features, test_pred_close_from_features)
standard_mape = mape(test_actual_close_from_features, test_pred_close_from_features)

# 反标准化以获取原始尺度的预测 - 使用专门的收盘价缩放器
# 先将多特征提取的Close列转换为单独的时间序列
test_actual_close_ts = TimeSeries.from_times_and_values(
    times=test_actual_close_from_features.time_index,
    values=test_actual_close_from_features.values()
)

test_pred_close_ts = TimeSeries.from_times_and_values(
    times=test_pred_close_from_features.time_index,
    values=test_pred_close_from_features.values()
)

# 然后使用收盘价专用的缩放器进行反转
test_actual_close_original = scaler_close.inverse_transform(test_actual_close_ts)
test_pred_close_original = scaler_close.inverse_transform(test_pred_close_ts)

# 提取交易日和非交易日的数据
test_dates = test_actual_close_original.time_index
trading_indices = np.where(test_covariates.values().flatten() == 1)[0]
non_trading_indices = np.where(test_covariates.values().flatten() == 0)[0]

print(f"测试集中交易日数量: {len(trading_indices)}, 非交易日数量: {len(non_trading_indices)}")

# 分别计算交易日和非交易日的指标
if len(trading_indices) > 0:
    trading_actual = test_actual_close_original.values().flatten()[trading_indices]
    trading_pred = test_pred_close_original.values().flatten()[trading_indices]
    trading_mape = np.mean(np.abs((trading_actual - trading_pred) / trading_actual)) * 100
    trading_mae = np.mean(np.abs(trading_actual - trading_pred))
    trading_rmse = np.sqrt(np.mean((trading_actual - trading_pred) ** 2))
else:
    trading_mape = trading_mae = trading_rmse = np.nan

if len(non_trading_indices) > 0:
    non_trading_actual = test_actual_close_original.values().flatten()[non_trading_indices]
    non_trading_pred = test_pred_close_original.values().flatten()[non_trading_indices]
    non_trading_mape = np.mean(np.abs((non_trading_actual - non_trading_pred) / non_trading_actual)) * 100
    non_trading_mae = np.mean(np.abs(non_trading_actual - non_trading_pred))
    non_trading_rmse = np.sqrt(np.mean((non_trading_actual - non_trading_pred) ** 2))
else:
    non_trading_mape = non_trading_mae = non_trading_rmse = np.nan

# ========= 计算趋势预测准确率(ACC) =========
# 仅考虑交易日
if len(trading_indices) > 1:  # 至少需要2个交易日
    # 获取交易日的日期和数据
    trading_dates = test_dates[trading_indices]
    trading_actual = test_actual_close_original.values().flatten()[trading_indices]
    trading_pred = test_pred_close_original.values().flatten()[trading_indices]

    # 计算实际趋势和预测趋势
    trend_correct = 0
    trend_total = len(trading_indices) - 1  # 总趋势数比交易日数少1

    for i in range(1, len(trading_indices)):
        # 当前交易日真实值与上一交易日真实值比较
        actual_trend = trading_actual[i] > trading_actual[i-1]

        # 当前交易日预测值与上一交易日真实值比较
        pred_trend = trading_pred[i-1] > trading_actual[i-1]

        # 趋势预测是否一致
        if actual_trend == pred_trend:
            trend_correct += 1

    # 计算趋势预测准确率
    trend_accuracy = (trend_correct / trend_total) * 100

    # 详细输出每个交易日的趋势预测
    print("\n交易日趋势预测详情:")
    print(f"{'日期':<12} {'上一交易日真实值':<16} {'当前交易日真实值':<16} {'当前交易日预测值':<16} {'实际趋势':<10} {'预测趋势':<10} {'是否匹配':<10}")
    print(f"{'-'*90}")

    for i in range(1, len(trading_indices)):
        current_date = trading_dates[i].strftime('%Y-%m-%d')
        prev_actual = trading_actual[i-1]
        curr_actual = trading_actual[i]
        curr_pred = trading_pred[i-1]

        actual_trend = curr_actual > prev_actual
        pred_trend = curr_pred > prev_actual
        match = actual_trend == pred_trend

        actual_trend_str = "上涨" if actual_trend else "下跌"
        pred_trend_str = "上涨" if pred_trend else "下跌"
        match_str = "✓" if match else "✗"

        print(f"{current_date:<12} {prev_actual:<16.2f} {curr_actual:<16.2f} {curr_pred:<16.2f} {actual_trend_str:<10} {pred_trend_str:<10} {match_str:<10}")

    print(f"\n趋势预测准确率(ACC): {trend_accuracy:.2f}%")
    print(f"正确预测趋势: {trend_correct}/{trend_total}")
else:
    print("测试集中交易日数量不足，无法计算趋势预测准确率")
    trend_accuracy = np.nan

# 可视化测试结果，区分交易日和非交易日
fig = go.Figure()

# 添加真实值
fig.add_trace(go.Scatter(
    x=test_actual_close_original.time_index,
    y=test_actual_close_original.values().flatten(),
    mode='lines',
    name='实际值',
    line=dict(color='black')
))

# 添加预测值
fig.add_trace(go.Scatter(
    x=test_pred_close_original.time_index,
    y=test_pred_close_original.values().flatten(),
    mode='lines',
    name='预测值',
    line=dict(dash='dash', color='blue')
))

# 标记交易日
if len(trading_indices) > 0:
    trading_dates = [test_dates[i] for i in trading_indices]
    fig.add_trace(go.Scatter(
        x=trading_dates,
        y=test_actual_close_original.values().flatten()[trading_indices],
        mode='markers',
        name='交易日',
        marker=dict(color='green', size=8, symbol='circle')
    ))

# 更新图表标题，加入ACC指标
title_text = f'测试集滚动预测结果 (加权MAPE: {w_mape:.2f}%, 标准MAPE: {standard_mape:.2f}%'
if not np.isnan(trend_accuracy):
    title_text += f', ACC: {trend_accuracy:.2f}%'
title_text += ')'

fig.update_layout(
    title=title_text,
    xaxis_title='日期',
    yaxis_title='收盘价',
    width=900,
    height=500,
    legend=dict(x=0.01, y=0.99, orientation='h')
)
fig.show()

# 打印性能指标
print("\n预测性能评估:")
print(f"{'指标':<20} {'加权值':<15} {'标准值':<15}")
print(f"{'-'*50}")
print(f"{'MAE':<20} {w_mae:<15.4f} {standard_mae:<15.4f}")
print(f"{'RMSE':<20} {w_rmse:<15.4f} {standard_rmse:<15.4f}")
print(f"{'MAPE(%)':<20} {w_mape:<15.4f} {standard_mape:<15.4f}")
if not np.isnan(trend_accuracy):
    print(f"{'ACC(%)':<20} {trend_accuracy:<15.4f} {'N/A':<15}")

print("\n交易日与非交易日性能比较:")
print(f"{'指标':<20} {'交易日':<15} {'非交易日':<15}")
print(f"{'-'*50}")
print(f"{'MAE':<20} {trading_mae:<15.4f} {non_trading_mae:<15.4f}")
print(f"{'RMSE':<20} {trading_rmse:<15.4f} {non_trading_rmse:<15.4f}")
print(f"{'MAPE(%)':<20} {trading_mape:<15.4f} {non_trading_mape:<15.4f}")


# 6. 预测未来
# 使用全部数据重新训练一个模型用于未来预测
final_model = NBEATSModel(
    input_chunk_length=input_chunk_length,
    output_chunk_length=output_chunk_length,
    n_epochs=150,  # 增加轮数
    batch_size=32,
    generic_architecture=True,
    num_stacks=10,
    num_blocks=1,
    num_layers=4,
    layer_widths=256,
    dropout=0.1,
    model_name="nbeats_final_with_cov",
    random_state=42,
    optimizer_kwargs={"lr": 1e-4}
)

# 使用全部数据训练
print("\n训练最终预测模型...")
final_model.fit(
    series=scaled_features,
    past_covariates=covariates,
    verbose=True
)

# 需要为未来预测创建交易日协变量
# 首先确定未来的日期范围
forecast_horizon = 10  # 预测未来10天
last_date = data_all['Date'].max()
future_dates = [last_date + timedelta(days=i+1) for i in range(forecast_horizon)]

# 创建未来的交易日协变量 (这里我们需要知道哪些是交易日)
# 简单方法：假设周一至周五是交易日，周六日是非交易日
future_is_trading = []
for date in future_dates:
    # 如果是周末，则不是交易日
    is_trading = 0 if date.weekday() >= 5 else 1
    future_is_trading.append(is_trading)

# 创建扩展的协变量时间序列 (包括历史和未来)
# 对于过去协变量，我们需要提供包含历史和预测期的协变量
extended_dates = pd.DatetimeIndex(list(covariates.time_index) + future_dates)
extended_is_trading = np.concatenate([covariates.values().flatten(), np.array(future_is_trading)])

extended_covariates = TimeSeries.from_times_and_values(
    times=extended_dates,
    values=extended_is_trading.reshape(-1, 1)
)

# 预测未来
print("\n预测未来收盘价...")
future_pred_scaled = final_model.predict(
    n=forecast_horizon,
    series=scaled_features,
    past_covariates=extended_covariates
)

# 提取预测的Close列并转换为单独的时间序列
future_pred_close_from_features = future_pred_scaled.univariate_component(close_idx)
future_pred_close_ts = TimeSeries.from_times_and_values(
    times=future_pred_close_from_features.time_index,
    values=future_pred_close_from_features.values()
)

# 使用收盘价专用的缩放器反转
future_pred_close_original = scaler_close.inverse_transform(future_pred_close_ts)

# 可视化未来预测
fig = go.Figure()

# 显示历史数据（最后30个点）
historical_close = close_series  # 原始的收盘价数据
fig.add_trace(go.Scatter(
    x=historical_close.time_index[-30:],
    y=historical_close.values()[-30:].flatten(),
    mode='lines',
    name='历史数据',
    line=dict(color='black')
))

# 添加预测 - 区分交易日和非交易日
all_future_dates = future_pred_close_original.time_index
all_future_values = future_pred_close_original.values().flatten()

trading_dates = []
trading_values = []
non_trading_dates = []
non_trading_values = []

for i, date in enumerate(all_future_dates):
    if future_is_trading[i] == 1:
        trading_dates.append(date)
        trading_values.append(all_future_values[i])
    else:
        non_trading_dates.append(date)
        non_trading_values.append(all_future_values[i])

# 所有预测
fig.add_trace(go.Scatter(
    x=all_future_dates,
    y=all_future_values,
    mode='lines',
    name='未来预测',
    line=dict(dash='dash', color='blue')
))

# 交易日预测
if trading_dates:
    fig.add_trace(go.Scatter(
        x=trading_dates,
        y=trading_values,
        mode='markers',
        name='交易日预测',
        marker=dict(color='green', size=10, symbol='circle')
    ))

# 非交易日预测
if non_trading_dates:
    fig.add_trace(go.Scatter(
        x=non_trading_dates,
        y=non_trading_values,
        mode='markers',
        name='非交易日预测',
        marker=dict(color='red', size=8, symbol='x')
    ))

# 添加垂直线表示分界点
fig.add_vline(x=last_date, line_width=1, line_dash="dash", line_color="gray")
fig.add_annotation(x=last_date, y=historical_close.values()[-1][0],
            text="当前", showarrow=True, arrowhead=1, ax=-50, ay=-30)

fig.update_layout(
    title='未来10天收盘价预测 (区分交易日/非交易日)',
    xaxis_title='日期',
    yaxis_title='收盘价',
    width=900,
    height=500,
    legend=dict(x=0.01, y=0.99, orientation='h')
)
fig.show()

# 打印预测值，区分交易日和非交易日
print("\n未来10天的预测收盘价:")
for i, (date, value, is_trading) in enumerate(zip(all_future_dates, all_future_values, future_is_trading)):
    trading_status = "交易日" if is_trading == 1 else "非交易日"
    print(f"日期 {i+1} ({date.date()}, {trading_status}): {value:.2f}")

# 7. 对预测结果进行误差分析
# 计算实际值和预测值之间的加权残差
std_residuals = test_actual_close_from_features.values().flatten() - test_pred_close_from_features.values().flatten()
weighted_residuals = std_residuals * weights

# 可视化残差
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=test_actual_close_from_features.time_index,
    y=std_residuals,
    mode='lines',
    name='标准残差',
    line=dict(color='lightblue')
))
fig.add_trace(go.Scatter(
    x=test_actual_close_from_features.time_index,
    y=weighted_residuals,
    mode='lines',
    name='加权残差',
    line=dict(color='blue')
))
fig.add_hline(y=0, line_width=1, line_dash="dash", line_color="red")
fig.update_layout(
    title='预测残差分析',
    xaxis_title='日期',
    yaxis_title='残差',
    width=900,
    height=400
)
fig.show()

# 分别绘制交易日和非交易日的残差分布
trading_residuals = std_residuals[trading_indices]
non_trading_residuals = std_residuals[non_trading_indices]

# 创建残差分布直方图
fig = make_subplots(rows=1, cols=2, subplot_titles=("交易日残差分布", "非交易日残差分布"))

fig.add_trace(
    go.Histogram(
        x=trading_residuals,
        nbinsx=20,
        name='交易日',
        marker_color='green'
    ),
    row=1, col=1
)

fig.add_trace(
    go.Histogram(
        x=non_trading_residuals,
        nbinsx=20,
        name='非交易日',
        marker_color='red'
    ),
    row=1, col=2
)

fig.update_layout(
    title_text='残差分布比较',
    height=400,
    width=900,
    showlegend=False
)

fig.show()

# 计算残差统计量
trading_residual_mean = np.mean(trading_residuals)
trading_residual_std = np.std(trading_residuals)
non_trading_residual_mean = np.mean(non_trading_residuals)
non_trading_residual_std = np.std(non_trading_residuals)

print("\n残差统计分析:")
print(f"{'统计量':<20} {'交易日':<15} {'非交易日':<15}")
print(f"{'-'*50}")
print(f"{'平均残差':<20} {trading_residual_mean:<15.4f} {non_trading_residual_mean:<15.4f}")
print(f"{'残差标准差':<20} {trading_residual_std:<15.4f} {non_trading_residual_std:<15.4f}")
print(f"{'样本数量':<20} {len(trading_residuals):<15d} {len(non_trading_residuals):<15d}")

原始数据形状: (2912, 7)
原始数据前5行:
         Date         Open         High          Low        Close  \
0  2011-07-01  2767.833008  2778.667969  2752.966064  2759.362061   
1  2011-07-04  2770.939941  2813.270020  2770.939941  2812.818115   
2  2011-07-05  2812.721924  2818.141113  2799.110107  2816.354004   
3  2011-07-06  2811.814941  2811.814941  2780.729004  2810.479004   
4  2011-07-07  2813.193115  2825.123047  2793.892090  2794.267090   

     Adj Close  Volume  
0  2759.362061   92000  
1  2812.818115  122000  
2  2816.354004  110200  
3  2810.479004  103200  
4  2794.267090  116600  
使用的数据范围: 2021-06-07 00:00:00 至 2023-06-29 00:00:00
合并后的数据形状: (753, 8)
合并后的数据前5行:
        Date  is_trading_day         Open         High          Low  \
0 2021-06-07               1  3597.139893  3600.379883  3581.899902   
1 2021-06-08               1  3598.750000  3621.520020  3563.250000   
2 2021-06-09               1  3576.800049  3598.709961  3572.639893   
3 2021-06-10               1  3587.530029  

主要特征形状: (753, 8, 1)
收盘价形状: (753, 1, 1)
协变量形状: (753, 1, 1)


训练集大小: 527 (2021-06-07 00:00:00 至 2022-11-15 00:00:00)
验证集大小: 112 (2022-11-16 00:00:00 至 2023-03-07 00:00:00)
测试集大小: 114 (2023-03-08 00:00:00 至 2023-06-29 00:00:00)
开始训练多特征模型（带交易日协变量）...


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]


执行测试集滚动预测...


Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

测试集中交易日数量: 76, 非交易日数量: 38

交易日趋势预测详情:
日期           上一交易日真实值         当前交易日真实值         当前交易日预测值         实际趋势       预测趋势       是否匹配      
------------------------------------------------------------------------------------------
2023-03-09   3283.25          3276.09          3281.11          下跌         下跌         ✓         
2023-03-10   3276.09          3230.08          3261.34          下跌         下跌         ✓         
2023-03-13   3230.08          3268.70          3250.54          上涨         上涨         ✓         
2023-03-14   3268.70          3245.31          3219.50          下跌         下跌         ✓         
2023-03-15   3245.31          3263.31          3247.68          上涨         上涨         ✓         
2023-03-16   3263.31          3226.89          3228.34          下跌         下跌         ✓         
2023-03-17   3226.89          3250.55          3250.85          上涨         上涨         ✓         
2023-03-20   3250.55          3234.91          3208.68          下跌         下跌         ✓        


预测性能评估:
指标                   加权值             标准值            
--------------------------------------------------
MAE                  0.0318          0.0288         
RMSE                 0.0379          0.0345         
MAPE(%)              6.8906          6.2269         
ACC(%)               42.6667         N/A            

交易日与非交易日性能比较:
指标                   交易日             非交易日           
--------------------------------------------------
MAE                  27.3276         16.9085        
RMSE                 32.4165         18.8424        
MAPE(%)              0.8340          0.5162         

训练最终预测模型...


Training: |          | 0/? [00:00<?, ?it/s]


预测未来收盘价...


Predicting: |          | 0/? [00:00<?, ?it/s]


未来10天的预测收盘价:
日期 1 (2023-06-30, 交易日): 3165.54
日期 2 (2023-07-01, 非交易日): 3153.02
日期 3 (2023-07-02, 非交易日): 3146.99
日期 4 (2023-07-03, 交易日): 3103.10
日期 5 (2023-07-04, 交易日): 3081.58
日期 6 (2023-07-05, 交易日): 3073.63
日期 7 (2023-07-06, 交易日): 3061.31
日期 8 (2023-07-07, 交易日): 3061.55
日期 9 (2023-07-08, 非交易日): 3024.89
日期 10 (2023-07-09, 非交易日): 3017.07



残差统计分析:
统计量                  交易日             非交易日           
--------------------------------------------------
平均残差                 0.0178          0.0186         
残差标准差                0.0348          0.0130         
样本数量                 76              38             
