## Import所需套件

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Conv1D, MaxPooling1D, Dense, Concatenate, Dropout
from tensorflow.keras.models import Model, load_model
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score
import os
import ta

## 資料讀取與處理

In [2]:
# 設定資料夾路徑
data_folder = 'data'

# 合併所有CSV文件
all_data = []
for file in os.listdir(data_folder):
    if file.endswith('.csv'):
        data = pd.read_csv(os.path.join(data_folder, file))
        all_data.append(data)

# 合併所有數據到一個DataFrame
data = pd.concat(all_data, ignore_index=True)

# 將日期列轉換為日期型別
data['Date'] = pd.to_datetime(data['Date'])

data.set_index(['Date', 'ID'], inplace=True)
data.sort_index(inplace=True)
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,1_high,1_low,1_close,WOpen,MOpen,2_closs,3_closs,4_closs,5_closs,K,D,cross,second2MA,Fourth6MA,Fifth6MA
Date,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2020-01-02,2302,8.24,8.28,8.11,8.11,8.24,8.24,7.98,8.03,8.00,7.96,39.39,45.39,0,8.160,8.095000,8.048333
2020-01-02,2303,16.45,16.65,16.40,16.55,16.45,16.45,16.30,16.05,16.00,15.75,31.71,37.09,0,16.500,16.316667,16.183333
2020-01-02,2329,15.00,15.55,15.00,15.25,15.00,15.00,15.00,14.75,14.60,14.75,37.86,40.55,0,15.050,14.883333,14.866667
2020-01-02,2330,332.50,339.00,332.50,339.00,332.50,332.50,339.50,332.00,329.50,329.50,56.78,49.08,1,335.000,334.250000,333.416667
2020-01-02,2337,37.85,38.70,37.45,38.00,37.85,37.85,38.00,38.95,37.60,38.50,55.64,49.45,0,37.600,37.858333,38.041667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-29,8131,39.30,39.30,39.00,39.00,39.05,39.15,38.85,38.85,38.75,38.75,30.56,33.21,0,39.225,39.041667,38.941667
2023-12-29,8150,43.15,43.15,42.25,42.25,42.90,41.10,42.00,41.85,42.10,41.40,41.84,44.81,0,42.625,42.483333,42.100000
2023-12-29,8261,97.00,97.70,96.40,97.60,98.00,104.00,96.50,94.30,92.10,93.00,27.24,20.09,0,97.400,95.850000,95.116667
2023-12-29,8271,61.60,62.10,61.20,61.90,60.40,60.50,61.10,59.80,58.60,57.90,63.06,49.09,0,61.650,60.833333,60.116667


In [3]:
# 價格位置指標
data['price_position'] = (data['1_close'] - data['1_low']) / (data['1_high'] - data['1_low'])

# 相對低點特徵（過去7天的價格百分位數）
data['low_point_indicator'] = data.groupby(level=1)['1_close'].transform(
    lambda x: x.rolling(window=7).apply(lambda y: y.rank(pct=True)[-1])
)

# 計算RSI
data['RSI'] = ta.momentum.RSIIndicator(data['1_close']).rsi()

# 計算MACD
macd_indicator = ta.trend.MACD(data['1_close'])
data['MACD'] = macd_indicator.macd()
data['MACD_signal'] = macd_indicator.macd_signal()
data['MACD_diff'] = macd_indicator.macd_diff()

# 計算布林帶
bollinger_indicator = ta.volatility.BollingerBands(data['1_close'])
data['bollinger_mavg'] = bollinger_indicator.bollinger_mavg()

# 填充缺失值
data.fillna(method='ffill', inplace=True)
data.fillna(method='bfill', inplace=True)

# 保留未經標準化的1_close價格
data['1_close_unscaled'] = data['1_close']

data

  lambda x: x.rolling(window=7).apply(lambda y: y.rank(pct=True)[-1])
  data.fillna(method='ffill', inplace=True)
  data.fillna(method='bfill', inplace=True)


Unnamed: 0_level_0,Unnamed: 1_level_0,Open,1_high,1_low,1_close,WOpen,MOpen,2_closs,3_closs,4_closs,5_closs,...,Fourth6MA,Fifth6MA,price_position,low_point_indicator,RSI,MACD,MACD_signal,MACD_diff,bollinger_mavg,1_close_unscaled
Date,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2020-01-02,2302,8.24,8.28,8.11,8.11,8.24,8.24,7.98,8.03,8.00,7.96,...,8.095000,8.048333,0.000000,0.214286,49.431679,20.863297,8.400446,-9.560827,55.4620,8.11
2020-01-02,2303,16.45,16.65,16.40,16.55,16.45,16.45,16.30,16.05,16.00,15.75,...,16.316667,16.183333,0.600000,0.214286,49.431679,20.863297,8.400446,-9.560827,55.4620,16.55
2020-01-02,2329,15.00,15.55,15.00,15.25,15.00,15.00,15.00,14.75,14.60,14.75,...,14.883333,14.866667,0.454545,0.214286,49.431679,20.863297,8.400446,-9.560827,55.4620,15.25
2020-01-02,2330,332.50,339.00,332.50,339.00,332.50,332.50,339.50,332.00,329.50,329.50,...,334.250000,333.416667,1.000000,0.214286,49.431679,20.863297,8.400446,-9.560827,55.4620,339.00
2020-01-02,2337,37.85,38.70,37.45,38.00,37.85,37.85,38.00,38.95,37.60,38.50,...,37.858333,38.041667,0.440000,0.214286,49.431679,20.863297,8.400446,-9.560827,55.4620,38.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-29,8131,39.30,39.30,39.00,39.00,39.05,39.15,38.85,38.85,38.75,38.75,...,39.041667,38.941667,0.000000,0.571429,47.189199,-40.530269,-13.467206,-27.063063,188.4025,39.00
2023-12-29,8150,43.15,43.15,42.25,42.25,42.90,41.10,42.00,41.85,42.10,41.40,...,42.483333,42.100000,0.000000,0.142857,47.261674,-44.546144,-19.682994,-24.863150,187.5150,42.25
2023-12-29,8261,97.00,97.70,96.40,97.60,98.00,104.00,96.50,94.30,92.10,93.00,...,95.850000,95.116667,0.923077,1.000000,48.556504,-42.769452,-24.300286,-18.469167,168.3950,97.60
2023-12-29,8271,61.60,62.10,61.20,61.90,60.40,60.50,61.10,59.80,58.60,57.90,...,60.833333,60.116667,0.777778,0.857143,47.742315,-43.737919,-28.187812,-15.550107,169.8200,61.90


In [4]:
def create_labels(data):
    # 買入條件
    data['buy_condition'] = (
        (data['2_closs'] > data['second2MA'].shift(1)) &    # 第二日價格站上2MA
        (data['3_closs'] > data['WOpen'].shift(1)) &        # 第三日價格站上周開盤
        (data['cross'].shift(1) == 1) &                     # KD是否交叉
        (data['low_point_indicator'] < 0.5)                 # 相對低點
    )
    
    # 賣出條件
    data['sell_condition'] = (
        (data['1_close'] < data['MOpen']) |                 # 價格跌破月開盤
        (data['1_close'] < data['Fifth6MA']) |              # 價格跌破6MA(第五日)
        (data['1_close'] < data['Fourth6MA'])               # 價格跌破6MA(第四日)
    )
    
    # 標籤：2 買入，1 賣出，0 持有
    data['label'] = np.where(data['buy_condition'], 2, 
                            np.where(data['sell_condition'], 1, 0))
    
    return data

data = create_labels(data)
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,1_high,1_low,1_close,WOpen,MOpen,2_closs,3_closs,4_closs,5_closs,...,low_point_indicator,RSI,MACD,MACD_signal,MACD_diff,bollinger_mavg,1_close_unscaled,buy_condition,sell_condition,label
Date,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2020-01-02,2302,8.24,8.28,8.11,8.11,8.24,8.24,7.98,8.03,8.00,7.96,...,0.214286,49.431679,20.863297,8.400446,-9.560827,55.4620,8.11,False,True,1
2020-01-02,2303,16.45,16.65,16.40,16.55,16.45,16.45,16.30,16.05,16.00,15.75,...,0.214286,49.431679,20.863297,8.400446,-9.560827,55.4620,16.55,False,False,0
2020-01-02,2329,15.00,15.55,15.00,15.25,15.00,15.00,15.00,14.75,14.60,14.75,...,0.214286,49.431679,20.863297,8.400446,-9.560827,55.4620,15.25,False,False,0
2020-01-02,2330,332.50,339.00,332.50,339.00,332.50,332.50,339.50,332.00,329.50,329.50,...,0.214286,49.431679,20.863297,8.400446,-9.560827,55.4620,339.00,False,False,0
2020-01-02,2337,37.85,38.70,37.45,38.00,37.85,37.85,38.00,38.95,37.60,38.50,...,0.214286,49.431679,20.863297,8.400446,-9.560827,55.4620,38.00,False,True,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-29,8131,39.30,39.30,39.00,39.00,39.05,39.15,38.85,38.85,38.75,38.75,...,0.571429,47.189199,-40.530269,-13.467206,-27.063063,188.4025,39.00,False,True,1
2023-12-29,8150,43.15,43.15,42.25,42.25,42.90,41.10,42.00,41.85,42.10,41.40,...,0.142857,47.261674,-44.546144,-19.682994,-24.863150,187.5150,42.25,False,True,1
2023-12-29,8261,97.00,97.70,96.40,97.60,98.00,104.00,96.50,94.30,92.10,93.00,...,1.000000,48.556504,-42.769452,-24.300286,-18.469167,168.3950,97.60,False,True,1
2023-12-29,8271,61.60,62.10,61.20,61.90,60.40,60.50,61.10,59.80,58.60,57.90,...,0.857143,47.742315,-43.737919,-28.187812,-15.550107,169.8200,61.90,False,False,0


In [5]:
# 標準化
scaler = MinMaxScaler()
data[['Open', '1_high', '1_low', '1_close','K','D', 'cross', 'RSI' ,'MACD', 'bollinger_mavg']] = scaler.fit_transform(data[['Open', '1_high', '1_low', '1_close','K','D', 'cross', 'RSI' ,'MACD', 'bollinger_mavg']])

data

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,1_high,1_low,1_close,WOpen,MOpen,2_closs,3_closs,4_closs,5_closs,...,low_point_indicator,RSI,MACD,MACD_signal,MACD_diff,bollinger_mavg,1_close_unscaled,buy_condition,sell_condition,label
Date,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2020-01-02,2302,0.001328,0.001301,0.001337,0.001303,8.24,8.24,7.98,8.03,8.00,7.96,...,0.214286,0.084075,0.231699,8.400446,-9.560827,0.058719,8.11,False,True,1
2020-01-02,2303,0.002849,0.002826,0.002912,0.002881,16.45,16.45,16.30,16.05,16.00,15.75,...,0.214286,0.084075,0.231699,8.400446,-9.560827,0.058719,16.55,False,False,0
2020-01-02,2329,0.002580,0.002625,0.002646,0.002638,15.00,15.00,15.00,14.75,14.60,14.75,...,0.214286,0.084075,0.231699,8.400446,-9.560827,0.058719,15.25,False,False,0
2020-01-02,2330,0.061388,0.061554,0.062962,0.063165,332.50,332.50,339.50,332.00,329.50,329.50,...,0.214286,0.084075,0.231699,8.400446,-9.560827,0.058719,339.00,False,False,0
2020-01-02,2337,0.006812,0.006843,0.006911,0.006891,37.85,37.85,38.00,38.95,37.60,38.50,...,0.214286,0.084075,0.231699,8.400446,-9.560827,0.058719,38.00,False,True,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-29,8131,0.007081,0.006952,0.007206,0.007078,39.05,39.15,38.85,38.85,38.75,38.75,...,0.571429,0.028812,0.107923,-13.467206,-27.063063,0.324865,39.00,False,True,1
2023-12-29,8150,0.007794,0.007654,0.007823,0.007686,42.90,41.10,42.00,41.85,42.10,41.40,...,0.142857,0.030598,0.099826,-19.682994,-24.863150,0.323088,42.25,False,True,1
2023-12-29,8261,0.017768,0.017592,0.018110,0.018034,98.00,104.00,96.50,94.30,92.10,93.00,...,1.000000,0.062508,0.103408,-24.300286,-18.469167,0.284810,97.60,False,True,1
2023-12-29,8271,0.011211,0.011106,0.011423,0.011359,60.40,60.50,61.10,59.80,58.60,57.90,...,0.857143,0.042443,0.101456,-28.187812,-15.550107,0.287663,61.90,False,False,0


In [6]:
# 使用索引的Date來切分資料
train = data[data.index.get_level_values('Date').year < 2023]
test = data[data.index.get_level_values('Date').year == 2023]

In [7]:
test

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,1_high,1_low,1_close,WOpen,MOpen,2_closs,3_closs,4_closs,5_closs,...,low_point_indicator,RSI,MACD,MACD_signal,MACD_diff,bollinger_mavg,1_close_unscaled,buy_condition,sell_condition,label
Date,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2023-01-03,2302,0.002886,0.002853,0.002912,0.002918,16.65,16.65,16.70,16.70,16.80,17.05,...,0.428571,0.029811,0.166924,-15.948289,4.683017,0.158259,16.75,False,True,1
2023-01-03,2303,0.007294,0.007353,0.007443,0.007527,40.45,40.45,41.05,41.25,42.40,44.50,...,0.571429,0.049480,0.160182,-15.680540,1.070999,0.161062,41.40,False,True,1
2023-01-03,2329,0.003052,0.003026,0.003102,0.003087,17.55,17.55,17.60,17.80,18.05,18.35,...,0.571429,0.031192,0.151415,-16.335990,-2.621801,0.154030,17.65,False,True,1
2023-01-03,2330,0.082411,0.082414,0.083954,0.084478,446.00,446.00,449.50,458.50,458.50,481.00,...,0.571429,0.334823,0.215000,-10.552736,23.133015,0.153630,453.00,False,True,1
2023-01-03,2338,0.015360,0.015807,0.015583,0.016145,84.00,84.00,89.00,87.00,88.00,90.20,...,0.857143,0.078560,0.205744,-6.844283,14.833815,0.138965,87.50,False,True,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-29,8131,0.007081,0.006952,0.007206,0.007078,39.05,39.15,38.85,38.85,38.75,38.75,...,0.571429,0.028812,0.107923,-13.467206,-27.063063,0.324865,39.00,False,True,1
2023-12-29,8150,0.007794,0.007654,0.007823,0.007686,42.90,41.10,42.00,41.85,42.10,41.40,...,0.142857,0.030598,0.099826,-19.682994,-24.863150,0.323088,42.25,False,True,1
2023-12-29,8261,0.017768,0.017592,0.018110,0.018034,98.00,104.00,96.50,94.30,92.10,93.00,...,1.000000,0.062508,0.103408,-24.300286,-18.469167,0.284810,97.60,False,True,1
2023-12-29,8271,0.011211,0.011106,0.011423,0.011359,60.40,60.50,61.10,59.80,58.60,57.90,...,0.857143,0.042443,0.101456,-28.187812,-15.550107,0.287663,61.90,False,False,0


In [8]:
# 時間序列轉換
def prepare_sequences(data, window_size=20, forecast_horizon=5):
    features = ['Open', '1_high', '1_low', '1_close', 'K', 'D', 'RSI' ,'MACD', 'bollinger_mavg',
                'price_position', 'low_point_indicator']
    
    idx = pd.IndexSlice
    X, y  = [], []
    for stock in data.index.get_level_values(1).unique():
        stock_data = data.loc[idx[:, stock], features].values
        stock_labels = data.loc[idx[:, stock], 'label'].values
        for i in range(window_size, len(stock_data) - forecast_horizon):
            X.append(stock_data[i-window_size:i])
            y.append(stock_labels[i:i+forecast_horizon])
    
    return np.array(X), np.array(y)

X, y = prepare_sequences(data)
Train_X, Train_y = prepare_sequences(train)

In [9]:
# # 將y轉換為numpy數組
# y_array = np.array(Train_y)

# 計算每個值的數量
counts = np.bincount(Train_y.flatten())

print("數量為2的元素有：", counts[2])
print("數量為1的元素有：", counts[1])
print("數量為0的元素有：", counts[0])

數量為2的元素有： 7743
數量為1的元素有： 376907
數量為0的元素有： 122295


## Model

In [10]:
def create_model(input_shape, output_steps):
    # LSTM分支
    lstm_input = tf.keras.Input(shape=input_shape)
    x1 = LSTM(50, return_sequences=True)(lstm_input)
    x1 = Dropout(0.2)(x1)
    x1 = LSTM(50, return_sequences=True)(x1)
    x1 = tf.keras.layers.GlobalAveragePooling1D()(x1)
    
    # 1D CNN分支
    cnn_input = tf.keras.Input(shape=input_shape)
    x2 = Conv1D(filters=64, kernel_size=3, activation='relu')(cnn_input)
    x2 = MaxPooling1D(pool_size=2)(x2)
    x2 = Conv1D(filters=64, kernel_size=3, activation='relu')(x2)
    x2 = MaxPooling1D(pool_size=2)(x2)
    x2 = tf.keras.layers.Flatten()(x2)
    
    # 合併LSTM和CNN的輸出
    combined = Concatenate()([x1, x2])
    # 輸出層
    output = []
    for _ in range(output_steps):
        output.append(Dense(3, activation='softmax')(combined))
    
    model = Model(inputs=[lstm_input, cnn_input], outputs=output)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
    
    return model

## 若要重新訓練模型請跑以下train程式，如要跳過請勿執行。請執行後續Load model部分

In [11]:
# 模型訓練和評估 - 2020~22為訓練集
def train_and_evaluate(X, y, epochs=20, batch_size=32):
    tscv = TimeSeriesSplit(n_splits=3)
    histories = []

    for train_index, val_index in tscv.split(X):
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]
        
        model = create_model(input_shape=(X.shape[1], X.shape[2]), output_steps=y.shape[1])
        history = model.fit([X_train, X_train], [y_train[:, i] for i in range(y.shape[1])], 
                           epochs=epochs, batch_size=batch_size, validation_data=([X_val, X_val], [y_val[:, i] for i in range(y.shape[1])]),
                           verbose=1)
        histories.append(history)
    
    # 在訓練後保存模型
    model.save('model_202122.h5')
    
    return model, histories

model_202122, histories_202122 = train_and_evaluate(Train_X, Train_y)

Epoch 1/20
[1m793/793[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 11ms/step - loss: 3.2269 - val_loss: 3.0865
Epoch 2/20
[1m793/793[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - loss: 3.0102 - val_loss: 3.1053
Epoch 3/20
[1m793/793[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - loss: 3.0083 - val_loss: 3.1585
Epoch 4/20
[1m793/793[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - loss: 2.9866 - val_loss: 3.0753
Epoch 5/20
[1m793/793[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - loss: 2.9622 - val_loss: 3.0411
Epoch 6/20
[1m793/793[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - loss: 2.9273 - val_loss: 3.0421
Epoch 7/20
[1m793/793[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - loss: 2.9059 - val_loss: 3.0287
Epoch 8/20
[1m793/793[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - loss: 2.8750 - val_loss: 3.0244
Epoch 9/20
[1m793/793[0m [32



In [12]:
# 模型訓練和評估 - 全部資料為訓練集
def train_and_evaluate(X, y, epochs=20, batch_size=32):
    tscv = TimeSeriesSplit(n_splits=3)
    histories = []

    for train_index, val_index in tscv.split(X):
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]
        
        model = create_model(input_shape=(X.shape[1], X.shape[2]), output_steps=y.shape[1])
        history = model.fit([X_train, X_train], [y_train[:, i] for i in range(y.shape[1])], 
                           epochs=epochs, batch_size=batch_size, validation_data=([X_val, X_val], [y_val[:, i] for i in range(y.shape[1])]),
                           verbose=1)
        histories.append(history)
        
    # 在訓練後保存模型
    model.save('model_all.h5')
    
    return model, histories

model_all, histories_all = train_and_evaluate(X, y)

Epoch 1/20
[1m1058/1058[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 10ms/step - loss: 3.2412 - val_loss: 3.1210
Epoch 2/20
[1m1058/1058[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - loss: 3.0857 - val_loss: 3.1046
Epoch 3/20
[1m1058/1058[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - loss: 3.0728 - val_loss: 3.1282
Epoch 4/20
[1m1058/1058[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - loss: 3.0297 - val_loss: 3.0817
Epoch 5/20
[1m1058/1058[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - loss: 3.0185 - val_loss: 3.0636
Epoch 6/20
[1m1058/1058[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - loss: 2.9768 - val_loss: 3.0890
Epoch 7/20
[1m1058/1058[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10ms/step - loss: 2.9421 - val_loss: 3.0587
Epoch 8/20
[1m1058/1058[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - loss: 2.9276 - val_loss: 3.0640
Epoch 9/



## 直接Load 已訓練好的model請執行下段

In [17]:
model_202122 = load_model('model_202122.h5')
model_all = load_model('model_all.h5')



## 回測模型在測試集的表現

In [18]:
# 以2023資料測試20~22訓練模型
act_23 = []
def backtest_23(model, test, window_size=20):
    
    balance = 10000  # 初始資金
    positions = {}  # 持倉
    
    predictions = []
    true_labels = []
    
    features = ['Open', '1_high', '1_low', '1_close', 'K', 'D', 'RSI' ,'MACD', 'bollinger_mavg',
                'price_position', 'low_point_indicator']
    
    for i in range(window_size, len(test)-5, 5):
        stock_data = test.iloc[i-window_size:i][features].values.reshape(1, window_size, -1)
        pred = model.predict([stock_data, stock_data])
        
        action = np.argmax(pred[0])  # 轉換為 2, 1, 0
        stock_code = test.index[i][1]
        price = test.iloc[i]['1_close_unscaled']
        
        label = test.iloc[i]['label']
        true_labels.append(label)
        predictions.append(action)

        act_23.append(action)
        
        if action == 2 and stock_code not in positions:  # 買入
            shares = balance // price
            positions[stock_code] = (shares, price)
            balance -= shares * price
        elif action == 1 and stock_code in positions:  # 賣出
            shares, buy_price = positions[stock_code]
            balance += shares * price
            del positions[stock_code]
    
    # 賣出所有剩餘股票
    for stock_code, (shares, _) in positions.items():
        balance += shares * data.loc[data.index[-1][0], stock_code]['1_close_unscaled'] 
        
    # 計算準確率
    accuracy = accuracy_score(true_labels, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    
    return balance

final_balance_23 = backtest_23(model_202122, test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 226ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1

In [19]:
# 以全部資料之最後50%的數據 回測模型

act_all = []
def backtest_all(model, test, window_size=20):
    test = test.iloc[-len(test)//2:]  # 使用最後50%的數據
    
    balance = 10000  # 初始資金
    positions = {}  # 持倉
    
    predictions = []
    true_labels = []
    
    features = ['Open', '1_high', '1_low', '1_close', 'K', 'D', 'RSI' ,'MACD', 'bollinger_mavg',
                'price_position', 'low_point_indicator']
    
    for i in range(window_size, len(test)-5, 5):
        stock_data = test.iloc[i-window_size:i][features].values.reshape(1, window_size, -1)
        # stock_data = stock_data.astype('float32')
        pred = model.predict([stock_data, stock_data])
        
        action = np.argmax(pred[0])  # 轉換為 2, 1, 0
        stock_code = test.index[i][1]
        price = test.iloc[i]['1_close_unscaled']
        
        label = test.iloc[i]['label']
        true_labels.append(label)
        predictions.append(action)

        act_all.append(action)

        if action == 2 and stock_code not in positions:  # 買入
            shares = balance // price
            positions[stock_code] = (shares, price)
            balance -= shares * price
        elif action == 1 and stock_code in positions:  # 賣出
            shares, buy_price = positions[stock_code]
            balance += shares * price
            del positions[stock_code]
    
    # 賣出所有剩餘股票
    for stock_code, (shares, _) in positions.items():
        balance += shares * data.loc[data.index[-1][0], stock_code]['1_close_unscaled'] 
        
    # 計算準確率
    accuracy = accuracy_score(true_labels, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    
    return balance

final_balance_all = backtest_all(model_all, data)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 223ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1

## 最終持有資金

In [20]:
print(f"以2023資料測試之最終持有資金: {final_balance_23}")
print(f"以所有資料後50%測試之最終持有資金: {final_balance_all}")

以2023資料測試之最終持有資金: 10000
以所有資料後50%測試之最終持有資金: 10100.5


In [21]:
# 將y轉換為numpy數組
act_23 = np.array(act_23)
act_all = np.array(act_all)
# 計算每個值的數量
counts_23 = np.bincount(act_23.flatten())
counts_all = np.bincount(act_all.flatten())

print("23年預測-預測：", counts_23)
# print("23年預測-買入：", counts_23[2])
print("23年預測-賣出：", counts_23[1])
print("23年預測-持有：", counts_23[0])

print("回測-預測：", counts_all)
print("回測-買入：", counts_all[2])
print("回測-賣出：", counts_all[1])
print("回測-持有：", counts_all[0])

23年預測-預測： [ 722 6075]
23年預測-賣出： 6075
23年預測-持有： 722
回測-預測： [ 1681 12211     1]
回測-買入： 1
回測-賣出： 12211
回測-持有： 1681
