In [96]:
import tensorflow as tf
gpu_devices = tf.config.experimental.list_physical_devices("GPU")
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

In [97]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [98]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras.backend as kb

from libs.feature_extraction import *
from libs.make_model import *

In [99]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

In [100]:
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, LSTM, GRU, Dropout, SimpleRNN, concatenate, Input, Flatten
from tensorflow.keras.utils import plot_model
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import to_categorical

In [101]:
import warnings
warnings.filterwarnings(action='ignore')

import tensorflow as tf
tf.get_logger().setLevel('ERROR')

In [102]:
from matplotlib import font_manager, rc

path = 'c:/Windows/Fonts/malgun.ttf'
font_name = font_manager.FontProperties(fname=path).get_name()
rc('font',family=font_name)

plt.style.use('fivethirtyeight')
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.figsize'] = (20,10)

In [103]:
file_names = ['KOSPI대형주', 'KOSPI중형주', 'KOSPI소형주', 'KOSPI',
             'KOSPI200', 'KOSDAQ', 'KOSDAQ150']
open_file_names = ['KOSPI']#, 'KOSPI200', 'KOSDAQ', 'KOSDAQ150']

In [104]:
def get_pred_price(df, col, shift=1, bounds=0):
    df['pred_price'] = np.where(df[col].shift(-shift) > df[col] + bounds, 1, 0)
    return df

In [105]:
def scailing_df(df):
    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(df)
    df_scaled = pd.DataFrame(scaled, columns=df.columns, index=df.index)
    return df_scaled, scaler

In [106]:
def windowing_dataset(feature, label, window_size):
    feature_list = []
    label_list = []
    
    for i in range(len(feature) - window_size):
        feature_list.append(np.array(feature.iloc[i:i+window_size]))
        label_list.append(np.array(label.iloc[i+window_size-1]))
    return np.array(feature_list), np.array(label_list)

In [107]:
def make_dataset(df, train, test, window):
    train_test_set = []
    data_list = [train, test]
    feature_list = df.columns.difference(['pred_price'])
    target = 'pred_price'
    
    for data in data_list:
        feature, label = data[feature_list], data[target]
        train_test_set.append(windowing_dataset(feature, label, window))
    return train_test_set, feature_list

In [108]:
ex_df = {}
for open_files in open_file_names:
    ex_df[open_files] = get_extraction_df(open_files, open_files, 'Add')
    ex_df[open_files] = Feature_Extraction(ex_df[open_files])

Loading...
Finish Moving Average
Loading...
Finish Volatility
Loading...
Finish Volume
Loading...
Finish Momentum
Loading...
Finish All
Number of Features: 66
Number of Datas: 3001


In [109]:
df = ex_df['KOSPI']
up_df = {}
up_df['0'] = get_pred_price(df.copy(), 'close', bounds=0)
up_df['10'] = get_pred_price(df.copy(), 'close', bounds=10)
up_df['20'] = get_pred_price(df.copy(), 'close', bounds=20)
up_df['30'] = get_pred_price(df.copy(), 'close', bounds=30)
up_df['50'] = get_pred_price(df.copy(), 'close', bounds=50)

In [110]:
train_len = int(3000 * 0.8)
test = up_df['0'][train_len:]
u,c = np.unique(test['pred_price'], return_counts=True)
dict(zip(u,c))

{0: 261, 1: 340}

In [111]:
train_len = int(3000 * 0.8)
test = up_df['10'][train_len:]
u,c = np.unique(test['pred_price'], return_counts=True)
dict(zip(u,c))

{0: 395, 1: 206}

In [112]:
train_len = int(3000 * 0.8)
test = up_df['20'][train_len:]
u,c = np.unique(test['pred_price'], return_counts=True)
dict(zip(u,c))

{0: 476, 1: 125}

In [113]:
train_len = int(3000 * 0.8)
test = up_df['30'][train_len:]
u,c = np.unique(test['pred_price'], return_counts=True)
dict(zip(u,c))

{0: 526, 1: 75}

In [114]:
train_len = int(3000 * 0.8)
test = up_df['50'][train_len:]
u,c = np.unique(test['pred_price'], return_counts=True)
dict(zip(u,c))

{0: 582, 1: 19}

In [116]:
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import classification_report

In [117]:
window = (1, 22, 60)
epoch = 15
batch_size = 32
dropout_rate = 0.5
for key, df in up_df.items():
    df, scaler = scailing_df(df)
    train_len = int(3000 * 0.8)
    train, test = df[:train_len], df[train_len:]
    
    print('Upper Bounds:',key)
    ttset, features = make_dataset(df, train, test,window[0])
    (short_x_train, short_y_train), (short_x_test, short_y_test) = ttset
    ttset, features = make_dataset(df, train, test, window[1])
    (mid_x_train, mid_y_train), (mid_x_test, mid_y_test) = ttset
    ttset, features = make_dataset(df, train, test, window[2])
    (long_x_train, long_y_train), (long_x_test, long_y_test) = ttset
    
    print('Short Data Shape:', end = ' ')
    print(short_x_train.shape, short_y_train.shape, short_x_test.shape, short_y_test.shape)
    print('Middle Data Shape:', end = ' ')
    print(mid_x_train.shape, mid_y_train.shape, mid_x_test.shape, mid_y_test.shape)
    print('Long Data Shape:', end = ' ')
    print(long_x_train.shape, long_y_train.shape, long_x_test.shape, long_y_test.shape)
    
    y_train = to_categorical(long_y_train, 2, dtype='int32')
    y_test = to_categorical(long_y_test, 2, dtype='int32')
    
    model, callbacks = create_model(model_n = 'multi-cnn',
                                 dropout_rate = dropout_rate, 
                                 path = 'd.h5',
                                 multi_input = True,
                                 short_x_train = short_x_train,
                                 mid_x_train = mid_x_train,
                                 long_x_train = long_x_train)
    
    short_x_train_t = short_x_train[:-(window[2] - window[0])]
    mid_x_train_t = mid_x_train[:-(window[2] - window[1])]
    short_x_test_t = short_x_test[:-(window[2] - window[0])]
    mid_x_test_t = mid_x_test[:-(window[2] - window[1])]
            
    print('Reshape Data...')
    print('Short Data Shape:', end = ' ')
    print(short_x_train_t.shape, short_x_test_t.shape)
    print('Middle Data Shape:', end = ' ')
    print(mid_x_train_t.shape, mid_x_test_t.shape)
    
    hist = model.fit([short_x_train_t, mid_x_train_t, long_x_train], y_train,
                            epochs=epoch, batch_size=batch_size,
                            validation_data=([short_x_test_t, mid_x_test_t,
                                              long_x_test], y_test),
                            callbacks=callbacks,
                            verbose=1)
    
    pred = model.predict([short_x_test_t, mid_x_test_t, long_x_test])
    y_pred = np.argmax(pred, axis=1)
    y_true = np.argmax(y_test, axis=1)
    
    print(y_pred)
    print(y_true)
    reverse = scaler.inverse_transform(test)
    reverse_df = pd.DataFrame(reverse, columns = test.columns,
                             index = test.index)
    close = reverse_df['close'].values
    close = close[window[2]:]
    y_true = y_true[:-1]
    y_pred = y_pred[:-1]
    
    print(len(y_true), len(y_pred))
    unique, counts = np.unique(y_pred, return_counts=True)
    print(dict(zip(unique, counts)))
    print(classification_report(y_true, y_pred))
    date_lists = [5,22,60,120,255,len(y_pred)]
    for date in date_lists:
        y_pred_t = y_pred.copy()
        y_true_t = y_true.copy()

        y_pred_t = y_pred_t[len(y_pred) % date:]
        y_true_t = y_true_t[(len(y_true)) % date:]
        #print(y_pred_t.shape, y_true_t.shape)
        y_pred_t = y_pred_t.reshape(len(y_pred_t) // date, date)
        y_true_t = y_true_t.reshape(len(y_true_t) // date, date)
        #print(y_pred_t.shape, y_true_t.shape)

        pred_profit_list = []
        actual_profit_list = []
        idx = 0
        for y_p, y_t in zip(y_pred_t, y_true_t):
            init_money = 1000000
            pred_m = init_money
            actual_m = init_money
            for i in range(len(y_p)):
                predict = y_p[i]
                actual = y_t[i]

                today_close = close[idx]
                next_day_close = close[idx+1]
                idx += 1
                profit_ratio = round(next_day_close / today_close, 2)

                if predict == 1:
                    pred_m *= profit_ratio
                if actual == 1:
                    actual_m *= profit_ratio
            pred_profit_list.append(pred_m / 1000000)
            actual_profit_list.append(actual_m / 1000000)

        pred_profit_list = np.array(pred_profit_list)
        actual_profit_list = np.array(actual_profit_list)
        #print(pred_profit_list.shape, actual_profit_list.shape)\
        print('Date:',date)
        print(str(round(np.mean(pred_profit_list)*100, 2)) + '%,',
              str(round(np.mean(actual_profit_list)*100, 2)) + '%')
    
    print('')

Upper Bounds: 0
Short Data Shape: (2399, 1, 66) (2399,) (600, 1, 66) (600,)
Middle Data Shape: (2378, 22, 66) (2378,) (579, 22, 66) (579,)
Long Data Shape: (2340, 60, 66) (2340,) (541, 60, 66) (541,)
Reshape Data...
Short Data Shape: (2340, 1, 66) (541, 1, 66)
Middle Data Shape: (2340, 22, 66) (541, 22, 66)
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 1 1 0 1 1
 1 1 1 1 0 1 1 1 0 0 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0
 1 1 0 1 1 1 0 1 1 1 1 1 1 0 1 0 0 0 0 1 1 1 1 0 0 0 1 1 0 1 0 0 0 1 1 0 0
 1 1 0 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0
 0 0 0 0 0 0 1 1 1 0 0 0 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 1 1 1 0 1 1 1 1 1 0
 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1

              precision    recall  f1-score   support

           0       0.66      0.99      0.79       351
           1       0.73      0.04      0.08       189

    accuracy                           0.66       540
   macro avg       0.69      0.52      0.44       540
weighted avg       0.68      0.66      0.54       540

Date: 5
100.11%, 100.27%
Date: 22
100.46%, 100.58%
Date: 60
101.34%, 103.19%
Date: 120
101.5%, 104.02%
Date: 255
101.5%, 111.68%
Date: 540
112.62%, 131.58%

Upper Bounds: 20
Short Data Shape: (2399, 1, 66) (2399,) (600, 1, 66) (600,)
Middle Data Shape: (2378, 22, 66) (2378,) (579, 22, 66) (579,)
Long Data Shape: (2340, 60, 66) (2340,) (541, 60, 66) (541,)
Reshape Data...
Short Data Shape: (2340, 1, 66) (541, 1, 66)
Middle Data Shape: (2340, 22, 66) (541, 22, 66)
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
[0 0 0 0 1 1 1 0 0 1 1 0 0 0 0 0 0 

540 540
{0: 540}
              precision    recall  f1-score   support

           0       0.87      1.00      0.93       469
           1       0.00      0.00      0.00        71

    accuracy                           0.87       540
   macro avg       0.43      0.50      0.46       540
weighted avg       0.75      0.87      0.81       540

Date: 5
100.0%, 100.13%
Date: 22
100.0%, 100.18%
Date: 60
100.0%, 101.52%
Date: 120
100.0%, 100.22%
Date: 255
100.0%, 103.71%
Date: 540
100.0%, 114.09%

Upper Bounds: 50
Short Data Shape: (2399, 1, 66) (2399,) (600, 1, 66) (600,)
Middle Data Shape: (2378, 22, 66) (2378,) (579, 22, 66) (579,)
Long Data Shape: (2340, 60, 66) (2340,) (541, 60, 66) (541,)
Reshape Data...
Short Data Shape: (2340, 1, 66) (541, 1, 66)
Middle Data Shape: (2340, 22, 66) (541, 22, 66)
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
[0 0 0 0 0 0 0 0 0 0 0