In [5]:
import os
import pandas as pd
import numpy as np
import glob

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping# データディレクトリのパス
data_dir = '../stocks/stocks/'

# CSVファイルのパスを取得
csv_files = glob.glob(os.path.join(data_dir, '*.csv'))
print(csv_files)
# データフレームのリスト
data_list = []

# 各CSVファイルを読み込み、'Date'と'Open'列を抽出
for file in csv_files:
    df = pd.read_csv(file)
    # 'Date'と'Open'列のみを使用
    df = df[['Date', 'Open']]
    # ファイル名を識別子として追加（必要に応じて）
    ticker = os.path.basename(file).split('.')[0]
    df['Ticker'] = ticker
    data_list.append(df)

# すべてのデータを結合
all_data = pd.concat(data_list, ignore_index=True)

# 日付でソート
all_data['Date'] = pd.to_datetime(all_data['Date'])
all_data.sort_values('Date', inplace=True)

# 重複した日付を持つ場合、平均を取る（または他の方法で処理）
all_data = all_data.groupby('Date')['Open'].mean().reset_index()# 特徴量と目的変数の作成
# 過去30日間の'Open'値を使用して、次の日の'Open'値を予測

def create_sequences(data, window_size):
    X = []
    y = []
    for i in range(len(data) - window_size):
        X.append(data[i:i+window_size])
        y.append(data[i+window_size])
    return np.array(X), np.array(y)

window_size = 30  # 過去30日間
data_values = all_data['Open'].values

X, y = create_sequences(data_values, window_size)# 訓練用とテスト用に分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# スケーラーの定義
scaler_X = StandardScaler()
scaler_y = StandardScaler()

# Xのスケーリング
X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)

# yのスケーリング
y_train = scaler_y.fit_transform(y_train.reshape(-1, 1))
y_test = scaler_y.transform(y_test.reshape(-1, 1))

# LSTMの入力に合わせて次元を変更
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)# 事前学習用のLSTMモデルを構築
pretrain_model = Sequential()
pretrain_model.add(LSTM(64, input_shape=(window_size, 1), return_sequences=True))
pretrain_model.add(LSTM(32))
pretrain_model.add(Dense(1))

pretrain_model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# 早期停止の設定
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# モデルの事前学習
pretrain_history = pretrain_model.fit(
    X_train,
    y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    # callbacks=[early_stopping]
)# 事前学習済みモデルの保存
pretrain_model.save('pretrained_lstm_model.h5')

['../stocks/stocks\\A.csv', '../stocks/stocks\\AA.csv', '../stocks/stocks\\AACG.csv', '../stocks/stocks\\AAL.csv', '../stocks/stocks\\AAMC.csv', '../stocks/stocks\\AAME.csv', '../stocks/stocks\\AAN.csv', '../stocks/stocks\\AAOI.csv', '../stocks/stocks\\AAON.csv', '../stocks/stocks\\AAP.csv', '../stocks/stocks\\AAPL.csv', '../stocks/stocks\\AAT.csv', '../stocks/stocks\\AAU.csv', '../stocks/stocks\\AAWW.csv', '../stocks/stocks\\AAXN.csv', '../stocks/stocks\\AB.csv', '../stocks/stocks\\ABB.csv', '../stocks/stocks\\ABBV.csv', '../stocks/stocks\\ABC.csv', '../stocks/stocks\\ABCB.csv', '../stocks/stocks\\ABEO.csv', '../stocks/stocks\\ABEV.csv', '../stocks/stocks\\ABG.csv', '../stocks/stocks\\ABIO.csv', '../stocks/stocks\\ABM.csv', '../stocks/stocks\\ABMD.csv', '../stocks/stocks\\ABR.csv', '../stocks/stocks\\ABT.csv', '../stocks/stocks\\ABTX.csv', '../stocks/stocks\\ABUS.csv', '../stocks/stocks\\AC.csv', '../stocks/stocks\\ACA.csv', '../stocks/stocks\\ACAD.csv', '../stocks/stocks\\ACAM.csv', 