In [None]:
import pandas as pd
import numpy as np
import keras
from keras.models import Model
from keras.layers import Input, Dense, LSTM, Conv1D, \
    BatchNormalization, Dropout, MaxPooling1D, Flatten
import tensorflow as tf
from tqdm import tqdm
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from Indicator import *
from DataScaler import *

In [None]:
class Network:
    def __init__(self,input_dim = 0, output_dim = 0, lr = 0.001,
                activation = "relu", loss = "BC"):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.lr = lr
        self.activation = activation
        if loss == "mse":
            self.loss = tf.keras.losses.MeanSquaredError()
        elif loss == "BC":
            self.loss = tf.keras.losses.BinaryCrossentropy()
    
    def predict(self,x):
        pass

class DNN(Network):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.model = self.get_network_head()
        self.model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.lr),
                           metrics=[tf.keras.metrics.BinaryAccuracy()],
                            loss = self.loss)

    def get_network_head(self):
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(256,input_shape = (self.input_dim,)))
        model.add(keras.layers.Dropout(0.1))
        model.add(keras.layers.Dense(128,activation=self.activation))
        model.add(keras.layers.Dropout(0.1))
        model.add(keras.layers.Dense(64,activation=self.activation))
        model.add(keras.layers.Dropout(0.1))
        model.add(keras.layers.Dense(32,activation=self.activation))
        model.add(keras.layers.Dropout(0.1))
        model.add(keras.layers.Dense(1,activation="sigmoid"))
        return model

    def train_on_batch(self, x, y):
        self.model.fit(x,y,batch_size = 1024, epochs = 5)

    def predict(self, sample):
        sample = np.array(sample).reshape((-1, self.input_dim))
        return self.model.predict(sample)
    
class LSTMNetwork(Network):
    def __init__(self, num_steps, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.num_steps = num_steps
        self.model = self.get_network_head()
        self.model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.lr),
                           metrics=[tf.keras.metrics.BinaryAccuracy()],
                            loss=self.loss)
    def make_dataset(self,data, label, window_size=20):
        feature_list = []
        label_list = []
        print(">>LSTM Data transpose")
        for i in tqdm(range(len(data) - window_size)):
            feature_list.append(np.array(data.iloc[i:i+window_size]))
            label_list.append(np.array(label.iloc[i+window_size - 1]))
        return np.array(feature_list), np.array(label_list)
    def get_network_head(self):
        model = keras.models.Sequential()
        model.add(LSTM(256, input_shape = (self.num_steps,self.input_dim)))
        model.add(keras.layers.Dropout(0.1))
        model.add(keras.layers.Dense(128,activation=self.activation))
        model.add(keras.layers.Dropout(0.1))
        model.add(keras.layers.Dense(64,activation=self.activation))
        model.add(keras.layers.Dropout(0.1))
        model.add(keras.layers.Dense(32,activation=self.activation))
        model.add(keras.layers.Dropout(0.1))
        model.add(keras.layers.Dense(1,activation="sigmoid"))
        return model

    def train_on_batch(self, x, y):
        x,y = self.make_dataset(x,y)
        self.model.fit(x, y, batch_size = 1024, epochs = 1)

    def predict(self, x):
        pred = self.model.predict(x)
        return pred
class LogisticNetwork(Network):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.model = self.get_network_head()
        self.model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=[tf.keras.metrics.BinaryAccuracy()])
    def get_network_head(self):
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(256,input_shape = (self.input_dim,)))
        model.add(keras.layers.Dense(1))
        return model

    def train_on_batch(self,x,y):
        self.model.fit(x,y,batch_size = 256, epochs = 10)
        
class Xgboost(Network):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.model = self.get_network_head()

    def get_network_head(self):
        return XGBClassifier(max_depth = 10)
    
    def train_on_batch(self,x,y):
        self.model.fit(x, y)
        

In [None]:
data = pd.read_csv("labeled_data.csv").drop(["Unnamed: 0"],axis=1)
parameter = [
        {"rsi" : {"period" : 14}},
        {"ma" : {"period" : 7}},
        {"ma" : {"period" : 25}},
        {"ema" :{"period" : 7}},
        {"ema" :{"period" : 25}},
        {"stochastic" : {"n" : 14,"m" : 5,"t" : 5}},
        {"bb" : {"length" : 21,"std" : 2}},
        {"kdj" : {}},
        {"macd" : {"fast_period": 12, "slow_period" : 26}}
]
DataManageBot = DataManage(data, parameter = parameter)
data = DataManageBot.get_data()
data.head()
x = data.drop(['datetime','label'],axis=1)
y = data.label
# x,y = make_dataset(x,y)
print(x.shape, y.shape)

In [None]:
# test_size = int(len(df) * 0.2)
test_size = 1440 * 30
X,Y = data.drop(['label','datetime'],axis = 1),data['label']
X = Data_StandardScaler(X)

x_train = X[:-test_size]
y_train = Y[:-test_size]
x_test = X[-test_size:]
y_test = Y[-test_size:]
x_train.shape,y_train.shape

In [None]:
class ensembleModel:
    def __init__(self,num_step, input_dim):
        self.DNNModel = DNN(input_dim = input_dim)
        self.LSTMModel = LSTMNetwork(input_dim = input_dim, num_steps = num_step)
        self.LRModel = LogisticNetwork(input_dim = input_dim)
        self.XGBoostModel = Xgboost(input_dim = input_dim)
        self.DNNPredict = None
        self.LRPredict = None
        self.XGBoostPredict = None
        self.LSTMPredict = None
    def models_fit(self,x,y):
        print(">> DNN Training...")
        self.DNNModel.train_on_batch(x,y)
        print(">> Logistic Training...")
        self.LRModel.train_on_batch(x,y)
        print(">> XGBoost Training...")
        self.XGBoostModel.train_on_batch(x,y)
        print(">> LSTM Training...")
        self.LSTMModel.train_on_batch(x,y)
    #  0 <= threshold < 0.5
    def predict_and_evaluation(self,x, threshold = 0):
        self.DNNPredict = self.DNNModel.predict(x_test)
        self.LRPredict = self.LRModel.model.predict(x_test)
        self.XGBoostPredict = self.XGBoostModel.model.predict(x_test)
        LSTM_x_test, LSTM_y_test = self.LSTMModel.make_dataset(x_test,y_test)
        self.LSTMPredict = self.LSTMModel.predict(LSTM_x_test)

        DNNPredict = self.DNNPredict.reshape(-1,)
        LRPredict = self.LRPredict.reshape(-1,)
        XGBoostPredict = self.XGBoostPredict.reshape(-1,)
        LSTMPredict = self.LSTMPredict.reshape(-1,)
        result_label = []
        cnt = 0
        for i in range(len(DNNPredict)):
            if DNNPredict[i] > 0.8 and LRPredict[i] > 0.8 and XGBoostPredict[i] > 0.8 and LSTMPredict[i] > 0.8:
                result_label.append(1)
            elif DNNPredict[i] < 0.2 and LRPredict[i] < 0.2 and XGBoostPredict[i] < 0.2 and LSTMPredict[i] < 0.2:
                result_label.append(0)
            else:
                result_label.append(-1)
                cnt += 1

        ck = 0 # 정답 카운트
        cnt = 0 # 총 카운트
        for i in range(len(result_label)):
            if result_label[i] != -1:
                if result_label[i] == y_test.iloc[i]:
                    ck += 1
                cnt += 1
        print("accuracy :",round(ck/cnt * 100,2),"%")

In [None]:
ensemble = ensembleModel(20, x_train.shape[1])
ensemble.models_fit(x_train,y_train)

In [None]:
DNNPredict = ensemble.DNNModel.predict(x_test)
LRPredict = ensemble.LRModel.model.predict(x_test)
XGBoostPredict = ensemble.XGBoostModel.model.predict(x_test)
LSTM_x_test, LSTM_y_test = ensemble.LSTMModel.make_dataset(x_test,y_test)
LSTMPredict = ensemble.LSTMModel.predict(LSTM_x_test)

In [None]:
DNNPredict = DNNPredict.reshape(-1,)
LRPredict = LRPredict.reshape(-1,)
XGBoostPredict = XGBoostPredict.reshape(-1,)
LSTMPredict = list([-1 for i in range(20)]) + list(LSTMPredict.reshape(-1,))

In [None]:
result_label = []
cnt = 0
for i in range(len(DNNPredict)):
    if DNNPredict[i] > 0.85:
        result_label.append(1)
    elif DNNPredict[i] <0.15:
        result_label.append(0)
    else:
        result_label.append(-1)
        cnt += 1
print(cnt)

In [None]:
result_label = []
cnt = 0
for i in range(len(DNNPredict)):
    if LSTMPredict[i] > 0.9 and DNNPredict[i] > 0.9 and LRPredict[i] > 0.9 and XGBoostPredict[i] == 1:
        result_label.append(1)
    elif LSTMPredict[i] < 0.1 and DNNPredict[i] < 0.1 and LRPredict[i] < 0.1 and XGBoostPredict[i] == 0:
        result_label.append(0)
    else:
        result_label.append(-1)
        cnt += 1
print(cnt)

In [None]:
ck = 0 # 정답 카운트
cnt = 0 # 총 카운트
report_list = []
report_pred = []
for i in range(len(result_label)):
    if result_label[i] != -1:
        if result_label[i] == y_test.iloc[i]:
            ck += 1
        cnt += 1
print("accuracy :",round(ck/cnt * 100,2),"%")

In [None]:
Data = data.iloc[-test_size:].drop(["datetime", "label"],axis = 1)
Data

In [None]:
import matplotlib.pyplot as plt
def checking_labeling(labeling_data):
    step = 1000
    for i in range(0, len(labeling_data), step):
        name = str(labeling_data.iloc[i]["datetime"])+ " ~ " + str(labeling_data.iloc[i + step - 1]["datetime"])
        fig = plt.figure(figsize=(16,9))
        fig = plt.title(name)
        fig = plt.plot(labeling_data.iloc[i:i + step]["datetime"], labeling_data.iloc[i:i + step]["close"])
        for j in range(i, i + step):
            if labeling_data.iloc[j]["result_label"] == 0:
                fig = plt.scatter(labeling_data.iloc[j]["datetime"], labeling_data.iloc[j]["close"], color = "r")
            if labeling_data.iloc[j]["result_label"] == 1:
                fig = plt.scatter(labeling_data.iloc[j]["datetime"], labeling_data.iloc[j]["close"], color = "b")
        
        plt.savefig("ML_Result/" + name + ".png")

In [None]:
result_data = pd.concat([data.iloc[-test_size:].reset_index(), pd.DataFrame(result_label,columns=["result_label"])],axis=1)
result_data

In [None]:
checking_labeling(result_data[:10000])

In [None]:
from Network import ensembleModel
model = ensembleModel(20,x_train.shape[1])

In [None]:
model.models_fit(x_train,y_train)