In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from statsmodels.tsa.arima.model import ARIMA
from arch import arch_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Conv1D, Flatten, MultiHeadAttention, Input
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

# 毒入真實數據
data = pd.read_csv('stock_futures_data.csv')

# 預處理數據
data['日期'] = pd.to_datetime(data['日期'])
data.set_index('日期', inplace=True)

# 直接選擇所需特徵
features = data[['股票開盤價', '股票最高價', '股票最低價', '股票收盤價', '股票交易量',
                 '期貨開盤', '期貨最高', '期貨最低', '期貨收盤']]

# 特徵縮放
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(features.values)

# 基礎模型定義
class BaseModels:
    @staticmethod
    def arima_garch(train):
        arima_model = ARIMA(train, order=(5, 1, 0)).fit()
        residuals = arima_model.resid
        garch_model = arch_model(residuals, vol='Garch', p=1, q=1).fit()
        forecast = arima_model.forecast(steps=1) + garch_model.forecast(horizon=1).mean.iloc[-1]
        return forecast.values

    @staticmethod
    def lstm_model(train):
        model = Sequential([
            LSTM(50, activation='relu', input_shape=(train.shape[1], 1)),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        return model

    @staticmethod
    def gru_model(train):
        model = Sequential([
            GRU(50, activation='relu', input_shape=(train.shape[1], 1)),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        return model

    @staticmethod
    def tcn_model(train):
        model = Sequential([
            Conv1D(64, kernel_size=3, activation='relu', input_shape=(train.shape[1], 1)),
            Flatten(),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        return model

    @staticmethod
    def transformer_model(train):
        input_layer = Input(shape=(train.shape[1], 1))
        attention = MultiHeadAttention(num_heads=4, key_dim=2)(input_layer, input_layer)
        flatten = Flatten()(attention)
        output = Dense(1)(flatten)
        model = Model(inputs=input_layer, outputs=output)
        model.compile(optimizer='adam', loss='mse')
        return model

# 集成學習模型
class EnsembleModel:
    def __init__(self):
        self.rf_model = RandomForestRegressor()
        self.xgb_model = XGBRegressor()

    def train(self, X, y):
        self.rf_model.fit(X, y)
        self.xgb_model.fit(X, y)

    def predict(self, X):
        rf_pred = self.rf_model.predict(X)
        xgb_pred = self.xgb_model.predict(X)
        return (rf_pred + xgb_pred) / 2

# 主流程
num_stocks = 3  # 股票數
num_models = 5  # 基礎模型數
results = []

for idx in range(num_stocks):
    print(f"Processing dataset {idx + 1}/{num_stocks}")

    # 提取目標欄位
    data_series = scaled_features[:, 3]  # 假設「股票收盤價」在第4列
    train, test = train_test_split(data, test_size=0.2, shuffle=False)

    # 訓練基礎模型
    base_predictions = []
    for model_idx in range(num_models):
        if model_idx == 0:
            prediction = BaseModels.arima_garch(train)
        else:
            X_train = scaled_features[:-1, :]
            y_train = scaled_features[1:, 3]
            input_shape = (X_train.shape[1], 1)

            if model_idx == 1:
                model = BaseModels.lstm_model(input_shape)
            elif model_idx == 2:
                model = BaseModels.gru_model(input_shape)
            elif model_idx == 3:
                model = BaseModels.tcn_model(input_shape)
            elif model_idx == 4:
                model = BaseModels.transformer_model(input_shape)

            X_train = X_train.reshape(-1, input_shape[0], 1)
            y_train = y_train.reshape(-1, 1)

            model.fit(X_train, y_train, epochs=5, verbose=0)
            prediction = model.predict(X_train[-1].reshape(1, -1, 1))
        base_predictions.append(prediction)

    # 集成學習層
    ensemble = EnsembleModel()
    X = np.array(base_predictions).T
    y = test
    ensemble.train(X, y)

    # 預測
    predictions = ensemble.predict(X)
    results.append(mean_squared_error(y, predictions))

print("MSE for each dataset:", results)


ModuleNotFoundError: No module named 'arch'