In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [3]:

def Nomalizaion_datas(df):
        # 훈련시킬 데이터와, 학습후 결과를 확인할 데이터 분리
    df = df.loc[df['Date']<="2021"]
    df.sort_index(ascending=False).reset_index(drop=True)

    #정규화하기
    scaler = MinMaxScaler()
    scale_cols = ['시가', '고가', '저가', '현재가', '거래량', '거래대금', 'D', 'G',
        'O', 'Dow', 'kosdaq', 'nasdaq', 'S&P500']
    df_scaled = scaler.fit_transform(df[scale_cols])
    df_scaled = pd.DataFrame(df_scaled)
    df_scaled.columns = scale_cols
    
    # 테스트사이즈, 윈도우사이즈 설정
    TEST_SIZE = 200
    WINDOW_SIZE = 20

    #--------------------------------------------------------
    train = df_scaled[:-TEST_SIZE]
    test = df_scaled[-TEST_SIZE:]

    test.reset_index(drop=True, inplace=True)
    
    return  train, test


def make_dataset(data, label, window_size=20):
    feature_list = []
    label_list = []
    for i in range(len(data) - window_size):
        feature_list.append(np.array(data.iloc[i:i+window_size]))
        label_list.append(np.array(label.iloc[i+window_size]))
    
    return np.array(feature_list), np.array(label_list)


 
def training_target(train):
    
    feature_cols = ['시가', '고가', '저가', '거래량', '거래대금', 'D', 'G',
            'O', 'kosdaq']
    label_cols = ['현재가']

    train_feature = train[feature_cols]
    train_label = train[label_cols]

    train_feature, train_label = make_dataset(train_feature, train_label, 20)

    x_train, x_valid, y_train, y_valid = train_test_split(train_feature, train_label, test_size=0.2)
    
    return x_train, x_valid, y_train, y_valid, feature_cols, label_cols, train_feature, train_label

  
def test_feature_label(feature_cols, label_cols):
    test_feature = test[feature_cols]
    test_label = test[label_cols]

    X_test, y_test = make_dataset(test_feature, test_label, 20)
    
    return X_test, y_test, test_feature, test_label

In [4]:
import tensorflow as test_feature
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM , Conv1D, Dropout, GRU
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

In [5]:
import tensorflow as tf
import keras_tuner as kt
import IPython

In [6]:
def model_bulider(hp):
    model = Sequential()
    # 첫번째 층에 있는 유닛 수 조정하기
    hp_unit1 = hp.Int("units1", min_value = 32, max_value = 512, step=32)
    hp_unit2 = hp.Int("units2", min_value = 32, max_value = 512, step=32)
    hp_unit3 = hp.Int("units3", min_value = 32, max_value = 512, step=32)
    hp_unit4 = hp.Int("units4", min_value = 32, max_value = 512, step=32)
    model.add(GRU(units = hp_unit1, 
               input_shape=(train_feature.shape[1], train_feature.shape[2]), 
               activation='relu', 
               return_sequences=True))
    model.add(GRU(units = hp_unit2,
               input_shape=(train_feature.shape[1], train_feature.shape[2]), 
               activation='relu', 
               return_sequences=True))
    model.add(GRU(units = hp_unit3,  
               activation='relu', 
               return_sequences=True))
    model.add(LSTM(units = hp_unit4, activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(1,activation='relu',))

    # 학습속도 조정하기
    #0.01, 0.001, 0.0001 ~ 0.05, 0.005, 0.0005
    hp_learning_rate = hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4, 2e-2, 2e-3, 2e-4, 3e-2, 3e-3, 3e-4, 4e-2, 4e-3, 4e-4, 5e-2, 5e-3, 5e-4])
    optimizer_2 = Adam(learning_rate= hp_learning_rate)
    model.compile(loss='mean_squared_error', optimizer=optimizer_2, metrics=["accuracy"])

    return model

In [9]:
path = 'data_kosdaq_price_OGD/'
file_list = os.listdir(path)
file_list_py = [file for file in file_list if file.endswith('.xlsx')]

In [10]:
import sys

In [15]:
sys.stdout = open('2번째.txt', 'w')
for i in file_list_py:
    df = pd.read_excel(path + i)
  
    train, test= Nomalizaion_datas(df)
    x_train, x_valid, y_train, y_valid, feature_cols, label_cols, train_feature, train_label = training_target(train)
    X_test, y_test, test_feature, test_label = test_feature_label(feature_cols, label_cols)
    tuner = kt.Hyperband(model_bulider,
                     objective = "val_accuracy",
                     max_epochs = 10,
                     factor = 3,
                     directory = f"my_dir_0{i}",
                     project_name = "intro_to_kt")
    with tf.device("/device:GPU:0"):
        tuner.search(x_train, y_train, epochs= 5, validation_data = (x_valid, y_valid))
        best_hps =tuner.get_best_hyperparameters(num_trials= 1)[0]
        print("----------------------------------------------------------------------------------------------------------")
        print(f"파일명: {i}", "최적 유닛수1 : {}, 최적 유닛수2 : {}, 최적 유닛수3 : {}, 최적 유닛수4 : {}, 최적 학습속도 : {}".format(best_hps.get("units1"), best_hps.get("units2"), best_hps.get("units3"), best_hps.get("units4"), best_hps.get("learning_rate")))
        # globals()["model" + i] = tuner.hypermodel.build(best_hps)

sys.stdout.close()

INFO:tensorflow:Reloading Oracle from existing project my_dir_0GST.xlsx.xlsx/intro_to_kt/oracle.json
INFO:tensorflow:Reloading Tuner from my_dir_0GST.xlsx.xlsx/intro_to_kt/tuner0.json
INFO:tensorflow:Oracle triggered exit
INFO:tensorflow:Reloading Oracle from existing project my_dir_0덕산네오룩스.xlsx.xlsx/intro_to_kt/oracle.json
INFO:tensorflow:Reloading Tuner from my_dir_0덕산네오룩스.xlsx.xlsx/intro_to_kt/tuner0.json
INFO:tensorflow:Oracle triggered exit
INFO:tensorflow:Reloading Oracle from existing project my_dir_0동진쌔미캠.xlsx.xlsx/intro_to_kt/oracle.json
INFO:tensorflow:Reloading Tuner from my_dir_0동진쌔미캠.xlsx.xlsx/intro_to_kt/tuner0.json
INFO:tensorflow:Oracle triggered exit
INFO:tensorflow:Reloading Oracle from existing project my_dir_0리노공업.xlsx.xlsx/intro_to_kt/oracle.json
INFO:tensorflow:Reloading Tuner from my_dir_0리노공업.xlsx.xlsx/intro_to_kt/tuner0.json
INFO:tensorflow:Oracle triggered exit
INFO:tensorflow:Reloading Oracle from existing project

In [12]:
class ClearTraingOutput(tf.keras.callbacks.Callback):
    def on_train_end(*args, **kwargs):
        IPython.display.clear_output(wait=True)

In [38]:
print(f"파일명: {i}", "최적 유닛수1 : {}, 최적 유닛수2 : {}, 최적 유닛수3 : {}, 최적 유닛수4 : {}, 최적 학습속도 : {}".format(best_hps.get("units1"), best_hps.get("units2"), best_hps.get("units3"), best_hps.get("units4"), best_hps.get("learning_rate")))

파일명: 하나머티리얼즈.xlsx.xlsx 최적 유닛수1 : 416, 최적 유닛수2 : 352, 최적 유닛수3 : 448, 최적 유닛수4 : 480, 최적 학습속도 : 0.02


In [39]:
model1 = tuner.hypermodel.build(best_hps)

In [40]:
model1.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_3 (GRU)                  (None, 20, 416)           532896    
_________________________________________________________________
gru_4 (GRU)                  (None, 20, 352)           813120    
_________________________________________________________________
gru_5 (GRU)                  (None, 20, 448)           1077888   
_________________________________________________________________
lstm_1 (LSTM)                (None, 480)               1783680   
_________________________________________________________________
dropout_1 (Dropout)          (None, 480)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 481       
Total params: 4,208,065
Trainable params: 4,208,065
Non-trainable params: 0
____________________________________________