# Code Sample

최근에 작성했던 코드를 첨부했습니다. 

In [None]:
import warnings
import time
import re
import pandas as pd
import numpy as np
import os
import itertools
import multiprocessing
import pickle
import tensorflow as tf
from tensorflow import keras
from functools import partial
from itertools import repeat
from datetime import datetime        
import matplotlib

import matplotlib.pylab as plt

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings(action='ignore')
#tf.logging.set_verbosity(tf.logging.ERROR)




class ArchiTools():
    def __init__(self, sDirHome, sMarket, nAhead, sModelName, nVerbose=1):
        self.dirHome = sDirHome
        self.market = sMarket
        self.ahead = nAhead
        self.modelName = sModelName
        self.verbose = nVerbose

        # Hyper-parameter
        self.archiDF = pd.read_csv(self.dirHome+'/psc/model_archi.csv')
        archiIdx = self.archiDF[self.archiDF['sModelName'] == self.modelName ].index[0]
        self.selection = self.archiDF.loc[archiIdx, 'sSelection']
        self.totFeature = self.archiDF.loc[archiIdx, 'nTotFeature']
        self.features = self.archiDF.loc[archiIdx, 'nFeatures']

        self.timeUnit = self.archiDF.loc[archiIdx, 'nTimeUnit']
        self.timeSteps = self.archiDF.loc[archiIdx, 'nTimeSteps']
        self.initialState = self.archiDF.loc[archiIdx, 'bInitialState']
        self.stacks = self.archiDF.loc[archiIdx, 'nStacks']

        self.hiddenNodes = self.archiDF.loc[archiIdx, 'ltHiddenNodes']
        self.hiddenNodes = np.int64(self.hiddenNodes.split('_'))
        self.regularFactor = self.archiDF.loc[archiIdx, 'nRegularFactor']
        self.lossName = self.archiDF.loc[archiIdx, 'sLossName']
        self.optimizer = self.archiDF.loc[archiIdx, 'sOptimizer']
        self.predIntervalSteps = self.archiDF.loc[archiIdx, 'sOptimizer']

        # directory
        self.rawDFDir = self.dirHome + "/out/totalDataMart"
        self.convertedDFDir = self.dirHome + "/out/convertedData"
        self.varImpDFDir = self.dirHome + "/out/variableImportance"
        self.trainInfoDir = self.dirHome + '/out/train_info/'

        #
        self.yName = self.market + '___px_last'
        self.yVar = "{}___px_last_{}".format(self.market, self.ahead)




 



class ReadTools(ArchiTools):
    ### Description ###
    # Read Data and Seperate Variables by functions
    def __init__(self, sDirHome, sMarket, nAhead, sModelName, nVerbose=1):
        super().__init__(sDirHome, sMarket, nAhead, sModelName, nVerbose)
    
    def readRawData(self, bX=False): #1.1
        if self.verbose: print("({}) [ Read Raw Data ]".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
        if bX: self.rawXDF = pd.read_csv(self.rawDFDir + "/variables.csv")
        self.rawDF = pd.read_csv(self.rawDFDir + "/markets.csv", usecols=['date', self.yName])
    
    def readConvertedData(self): #1.2
        if self.verbose: print("({}) [ Read Converted Data ]".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
        tmpFileName = "/{:03}.csv".format(self.ahead)
        tmpDir = self.convertedDFDir + tmpFileName      
        self.convertedDF = pd.read_csv(tmpDir)
        self.convertedDF.sort_values(by=['date'], axis=0, ascending=False, inplace=True)
        self.convertedDF.reset_index(drop=True, inplace=True)
        
    def readVarImpData(self): #1.3
        if self.verbose: print("({}) [ Read Variables Importance Data ]".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
        tmpFileName = '/{}/{:03}.csv'.format(self.market, self.ahead)
        tmpDir = self.varImpDFDir + tmpFileName
        self.varImpDF = pd.read_csv(tmpDir) 
        self.varImpDF = self.varImpDF[['var_name', 'correlation','impurity', 'SSE', 'matching_score', 'non_zero_number', 'rank_total']]
        self.varImpDF['score_1'] = (abs(self.varImpDF[['correlation']])-abs(self.varImpDF[['correlation']]).mean())/abs(self.varImpDF[['correlation']]).std()
        self.varImpDF['score_2'] = (self.varImpDF[['impurity']]-self.varImpDF[['impurity']].mean())/self.varImpDF[['impurity']].std()
        self.varImpDF['score_3'] = -(self.varImpDF[['SSE']]-self.varImpDF[['SSE']].mean())/(self.varImpDF[['SSE']].std())
        self.varImpDF['score_4'] = -(self.varImpDF[['matching_score']]-self.varImpDF[['matching_score']].mean())/self.varImpDF[['matching_score']].std()
        self.varImpDF['score_5'] = (self.varImpDF[['non_zero_number']]-self.varImpDF[['non_zero_number']].mean())/self.varImpDF[['non_zero_number']].std()
        
        if self.selection == 'SEL01':
            self.varImpDF['score'] = self.varImpDF[['score_1','score_2','score_3','score_4','score_5']].mean(1)
        elif self.selection == 'SEL02':
            self.varImpDF['score'] = self.varImpDF['score_3']
        elif self.selection == 'SEL03':
            self.varImpDF['score'] = self.varImpDF['score_5'] * 0.8 + self.varImpDF['score_2'] *0.2
        
        self.varImpDF.sort_values('score', ascending=False, inplace=True)
        self.varImpDF = self.varImpDF.iloc[:self.totFeature,]
        self.varImpDF.reset_index(drop=True, inplace=True)

    def slicingData(self): #1.4
            self.xVars = self.convertedDF.columns[1:80].tolist()
            self.convertedDF = self.convertedDF[['date'] + [self.yVar] + self.xVars]

        
    def slicingData0(self): #1.4
        if self.varChecker()==1:
            self.xVars = self.varImpDF['var_name'].tolist()
            self.xVars = list(set(self.xVars).intersection(self.convertedDF.columns.tolist())) #############################################################################3###### tmp code
            self.convertedDF = self.convertedDF[['date'] + [self.yVar] + self.xVars]
        
    def naChecker(self): #1.5
        cntNA = []
        for i in range(self.convertedDF.shape[1]):
            cntNA.append(pd.isna(self.convertedDF[self.convertedDF.columns[i]]).sum())
        idxNACols = np.where(np.array(cntNA)>0)[0]
        NACols = self.convertedDF.columns[idxNACols]
        rangeNA = []
        for i in idxNACols:
            idxNARows = np.where(pd.isna(self.convertedDF[self.convertedDF.columns[int(i)]]))[0]
            tmpNADF = self.convertedDF['date'][idxNARows]
            rangeNA.append(min(tmpNADF) + '~'  + max(tmpNADF))
        return pd.DataFrame({'market':self.market,
                             'ahead': self.ahead,
                             'na_column':NACols,'range':rangeNA})
    
    def varChecker(self): #1.6
        tmpNum = len(set(self.convertedDF.columns).intersection(set(self.varImpDF['var_name'])))
        if tmpNum==self.varImpDF.shape[0]:
            return 1
        else:
            print('Variables are not enough to be applied to the GRU.\n')
            return 0
    
    def naRemover(self): #1.7
        if self.verbose: print("({}) [ Remove Non-Available Data ]".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
        tmp = self.convertedDF.dropna()
        tmp.reset_index(drop=True, inplace=True)
        print("{:23} * {} rows are removed!".format('',self.convertedDF.shape[0] - tmp.shape[0]))
        self.convertedDF = tmp
        
        


class DataTools(ReadTools):
    def __init__(self, sDirHome, sMarket, nAhead, sModelName, sTrainEnd, nVerbose=1):
        super().__init__(sDirHome, sMarket, nAhead, sModelName, nVerbose)
        # Date-parameters
        self.trainEnd = sTrainEnd
        self.trainStart = pd.date_range(self.trainEnd, periods=2, freq='-519B').strftime('%Y-%m-%d')[1]
        self.validEnd = pd.date_range(self.trainStart, periods=2, freq='-1B').strftime('%Y-%m-%d')[1]
        self.validStart = pd.date_range(self.validEnd, periods=2, freq='-64B').strftime('%Y-%m-%d')[1]
        self.minmaxStart = pd.date_range(self.trainStart, periods=2, freq='-{}B'.\
                                        format(self.timeSteps*self.timeUnit)).strftime('%Y-%m-%d')[1]




    def dateMaker(self): #2.1
        self.convertedDF.rename(columns={'date':'date_base'}, inplace=True)
        def internalDateGen(date):
            return pd.date_range(date, periods=2, freq='{}B'.format(self.ahead)).strftime('%Y-%m-%d').tolist()[1]
        self.convertedDF['date_frct'] = self.convertedDF['date_base'].apply(lambda x: internalDateGen(x))
        self.convertedDF = self.convertedDF[['date_base', 'date_frct'] + [self.yVar] + self.xVars]

    def minmaxComputer(self): #2.2
        tmp = self.convertedDF.loc[(self.convertedDF['date_frct']>=self.minmaxStart)]
        tmp = tmp.loc[(tmp['date_frct']<=self.trainEnd)][self.xVars]
        listMin = tmp.min().tolist()
        listMax = tmp.max().tolist()
        self.minmaxDF = pd.DataFrame({'var_name': self.xVars, 
                                      'range':'{} ~ {}'.format(self.minmaxStart, self.trainEnd),
                                      'min':listMin, 
                                      'max': listMax})
        self.minmaxDF['use_ox'] = np.where(self.minmaxDF['max']-self.minmaxDF['min']!=0, 1, 0)
        self.xVarsUse = self.minmaxDF['var_name'].loc[self.minmaxDF['use_ox']==1].tolist()

    def minmaxNormalization(self): #2.3
        def internalMinMax(series):
            tmpMin = self.minmaxDF.loc[self.minmaxDF['var_name']==series.name]['min'].tolist()[0]
            tmpMax = self.minmaxDF.loc[self.minmaxDF['var_name']==series.name]['max'].tolist()[0]
            return (series-tmpMin)/(tmpMax-tmpMin)
        
        self.normalizedDF = pd.concat([self.convertedDF[['date_base','date_frct']+ [self.yVar]],
                                       self.convertedDF[self.xVarsUse].apply(lambda x: internalMinMax(x))], axis=1)

    def toNumpy(self, nUnitInterval, nRepeatedCells, bNormal=False, bYOn=False): #2.4
        print("({}) [ Data Preprocessing ]".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
        print('{:23} - Reshape Data (converting data frame to numpy.)'.format(''))
        if bNormal:
            self.normalizedDF.sort_values(by=['date_frct'], ascending=False, inplace=True)
            dateDF = self.normalizedDF[['date_base', 'date_frct']]
            yDF = self.normalizedDF[[self.yVar]]
            xDF = self.normalizedDF[self.xVarsUse]
        else:
            self.convertedDF.sort_values(by=['date_frct'], ascending=False, inplace=True)
            dateDF = self.convertedDF[['date_base', 'date_frct']]
            yDF = self.convertedDF[[self.yVar]]
            xDF = self.convertedDF[self.xVars]
        # batch index
        dimBatch = len(xDF) - nUnitInterval*(nRepeatedCells-1)
        listIdx = [np.arange(x, nUnitInterval*nRepeatedCells + x, nUnitInterval) for x in range(dimBatch)]
        # to numpy
        def internal1(data, index):
            return np.array(data.iloc[list(reversed(index)), ])
        def internal2(data, index):
            return data.iloc[index, ].max()
        flagDateDF = pd.DataFrame(map(internal2, repeat(dateDF), listIdx))
        xNP = np.array(list(map(internal1, repeat(xDF), listIdx)))
        if bYOn: 
            yNP = np.array(list(map(internal1, repeat(yDF),listIdx)))
            return flagDateDF, xNP, yNP
        else:
            return flagDateDF, xNP



class ModelTools():
    def __init__(self, nTimeSteps, nFeatures, bInitialState, nStacks, ltHiddenNodes, nRegularFactor, sLossName, sOptimizer):
        self.nTimeSteps = nTimeSteps
        self.nFeatures = nFeatures
        self.bInitialState = bInitialState
        self.nStacks = nStacks
        self.ltHiddenNodes = ltHiddenNodes
        self.nRegularFactor = nRegularFactor
        self.sLossName = sLossName
        self.sOptimizer = sOptimizer
        self.modelgen()
        self.modelcompile()

    def modelgen(self):
        layer_input = keras.Input(shape=(self.nTimeSteps, self.nFeatures), name='input')

        # Extract Initial State?
        if self.bInitialState:
            layer_state = keras.layers.GRU(units=self.ltHiddenNodes[0], return_state=True, name='gru_initial_state')(layer_input)[1]
            layer_gru = keras.layers.GRU(units=self.ltHiddenNodes[0], return_sequences=True, name='gru_cell_0')(layer_input, initial_state=layer_state)
        else:
            layer_gru = keras.layers.GRU(units=self.ltHiddenNodes[0], return_sequences=True, name='gru_cell_0')(layer_input)

        # Create GRU Cells if two more cell were needed.
        if self.nStacks>1:
            for x in range(self.nStacks-1):
                layer_gru = keras.layers.GRU(units=self.ltHiddenNodes[x+1], return_sequences=True, name='gru_cell_{}'.format(x+1))(layer_gru)

        # Output layer
        layer_output = keras.layers.TimeDistributed(keras.layers.Dense(units=1, activation='linear',
            kernel_regularizer=keras.regularizers.l2(l=self.nRegularFactor),
            kernel_initializer=tf.keras.initializers.Orthogonal(seed=925)), name='output')(layer_gru)

        # Deploy Model
        self.gru_model = keras.Model(layer_input, layer_output)

    def modelcompile(self):
        # Loss
        if self.sLossName == 'LOSS01':
            def customLoss(y_true, y_pred):
                mean_true = tf.math.reduce_mean(y_true)
                mean_pred = tf.math.reduce_mean(y_pred)

                meanerr = tf.math.sqrt(tf.math.square(mean_true - mean_pred))
                rmserr = tf.math.sqrt(tf.math.reduce_mean(tf.math.square(y_true-y_pred)))
                signerr = 1.0-tf.math.reduce_mean(tf.math.multiply(tf.math.sign(y_true), tf.math.sign(y_pred)))

                return rmserr + meanerr + signerr
        if self.sLossName == 'LOSS02':
            def customLoss(y_true, y_pred):
                mean_true = tf.math.reduce_mean(y_true)
                mean_pred = tf.math.reduce_mean(y_pred)

                std_true = tf.math.reduce_std(y_true)
                std_pred = tf.math.reduce_std(y_pred)

                up = tf.reduce_mean(tf.math.multiply((y_true-mean_true), (y_pred-mean_pred)))
                down = std_true * std_pred
                corr = 1.0-tf.compat.v1.where(tf.math.is_nan(up/down),0.0,up/down)

                rmserr = tf.math.sqrt(tf.math.reduce_mean(tf.math.square(y_true-y_pred)))
                signerr = 1-tf.math.reduce_mean(tf.math.multiply(tf.math.sign(y_true), tf.math.sign(y_pred)))

                return rmserr + corr/2 + signerr/2
        if self.sLossName == 'LOSS03':
            def customLoss(y_true, y_pred):
                cumsum_true = tf.math.cumsum(y_true)
                cumsum_pred = tf.math.cumsum(y_pred)

                cumsum_mean_true = tf.math.reduce_mean(cumsum_true)
                cumsum_mean_pred = tf.math.reduce_mean(cumsum_pred)

                mean_true = tf.math.reduce_mean(y_true)
                mean_pred = tf.math.reduce_mean(y_pred)

                std_true = tf.math.reduce_std(y_true)
                std_pred = tf.math.reduce_std(y_pred)

                up = tf.reduce_mean(tf.math.multiply((y_true-mean_true), (y_pred-mean_pred)))
                down = std_true * std_pred
                corr = 1.0-tf.compat.v1.where(tf.math.is_nan(up/down),0.0,up/down)

                cumsumerr = tf.math.sqrt(tf.math.reduce_mean(tf.math.square(cumsum_true-cumsum_pred)))
                cummeanerr = tf.math.sqrt(tf.math.reduce_mean(tf.math.square(cumsum_mean_true-cumsum_mean_pred)))

                rmserr = tf.math.sqrt(tf.math.reduce_mean(tf.math.square(y_true-y_pred)))
                meanerr = tf.math.sqrt(tf.math.reduce_mean(tf.math.square(mean_true-mean_pred)))
                signerr = 1-tf.math.reduce_mean((tf.math.multiply(tf.math.sign(y_true), tf.math.sign(y_pred))+1.0)/2.0)

                return (cumsumerr+cummeanerr)*0.3+(rmserr+signerr+corr/2.0)*0.7
        if self.sLossName == 'LOSS04':
            def customLoss(y_true, y_pred):
                cumsum_true = tf.math.cumsum(y_true)
                cumsum_pred = tf.math.cumsum(y_pred)

                cumsum_mean_true = tf.math.reduce_mean(cumsum_true)
                cumsum_mean_pred = tf.math.reduce_mean(cumsum_pred)

                mean_true = tf.math.reduce_mean(y_true)
                mean_pred = tf.math.reduce_mean(y_pred)

                std_true = tf.math.reduce_std(y_true)
                std_pred = tf.math.reduce_std(y_pred)

                up = tf.reduce_mean(tf.math.multiply((y_true-mean_true), (y_pred-mean_pred)))
                down = std_true * std_pred
                corr = 1.0-tf.compat.v1.where(tf.math.is_nan(up/down),0.0,up/down)

                cumsumerr = tf.math.sqrt(tf.math.reduce_mean(tf.math.square(cumsum_true-cumsum_pred)))
                cummeanerr = tf.math.sqrt(tf.math.reduce_mean(tf.math.square(cumsum_mean_true-cumsum_mean_pred)))

                rmserr = tf.math.sqrt(tf.math.reduce_mean(tf.math.square(y_true-y_pred)))
                meanerr = tf.math.sqrt(tf.math.reduce_mean(tf.math.square(mean_true-mean_pred)))
                signerr = 1-tf.math.reduce_mean((tf.math.multiply(tf.math.sign(y_true), tf.math.sign(y_pred))+1.0)/2.0)

                return (cumsumerr+cummeanerr)*0.2+(rmserr+signerr+corr/2.0)*0.8
        if self.sLossName == 'LOSS05':
            def customLoss(y_true, y_pred):
                mean_true = tf.math.reduce_mean(y_true)
                mean_pred = tf.math.reduce_mean(y_pred)
                mserr = tf.math.reduce_mean(tf.math.square(y_true-y_pred))
                return mserr

        # Optimizer
        if self.sOptimizer == 'Adam':
            self.gru_model.compile(loss=customLoss, optimizer=keras.optimizers.Adam())
        if self.sOptimizer == 'RMSprop':
            self.gru_model.compile(loss=customLoss, optimizer=keras.optimizers.RMSprop())






########################################
# 1. Get Data.
# 2. Train GRU Model.
# 3. Save model weights.
########################################


class TrainTools(DataTools):
    def __init__(self, sDirHome, sMarket, nAhead, sModelName, sTrainEnd, nVerbose=1):
        super().__init__(sDirHome, sMarket, nAhead, sModelName, sTrainEnd, nVerbose)

        # # process
        # self.getData()
        # self.preprocData()
        # self.splitData()
        # self.training()
        # self.saveTrainInfo()

    def getData(self):
        self.readConvertedData()
        self.readVarImpData()
        self.readRawData()
        self.slicingData()
        self.naRemover()

    def preprocData(self):	
        self.dateMaker()
        self.minmaxComputer()
        self.minmaxNormalization()
        self.dateDF, self.xNP, self.yNP = self.toNumpy(self.timeUnit, self.timeSteps, bNormal=True, bYOn=True)
        print(self.dateDF.shape, self.xNP.shape, self.yNP.shape)


    def splitData(self):
        self.trainYNP = self.yNP[(self.dateDF['date_frct']>=self.trainStart)&(self.dateDF['date_frct']<=self.trainEnd),:,:]
        self.validXNP = self.xNP[(self.dateDF['date_frct']>=self.validStart)&(self.dateDF['date_frct']<=self.validEnd),:,:]
        self.trainXNP = self.xNP[(self.dateDF['date_frct']>=self.trainStart)&(self.dateDF['date_frct']<=self.trainEnd),:,:]
        self.validYNP = self.yNP[(self.dateDF['date_frct']>=self.validStart)&(self.dateDF['date_frct']<=self.validEnd),:,:]

    def training(self, nEpochs=200):
        print("({}) [ Train GRU Models ]".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
        # select colunms
        self.colNumLT = [np.random.choice(self.totFeature, self.features, replace=False) for x in range(10)]
        # deploy 10 models
        def modelgen():
            return ModelTools(self.timeSteps,
                              self.features,
                              self.initialState,
                              self.stacks,
                              self.hiddenNodes,
                              self.regularFactor,
                              self.lossName,
                              self.optimizer)

        self.MODELTOOLS = [modelgen() for x in range(10)]
        earlyStop = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=30)

        self.trErr = []
        for x in range(10):
            print('{:23}   : Model {:02}'.format('', x))
            self.MODELTOOLS[x].gru_model.fit(self.trainXNP[:,:,self.colNumLT[x]], self.trainYNP,
                                             validation_data = (self.validXNP[:,:,self.colNumLT[x]], self.validYNP),
                                             batch_size=20,
                                             epochs=nEpochs,
                                             callbacks=[earlyStop],
                                             verbose=0)
            while np.isnan(self.MODELTOOLS[x].gru_model.predict(self.trainXNP[:,:,self.colNumLT[x]])[:,-1,-1]).sum()!=0:
                self.MODELTOOLS[x] = modelgen()
                self.colNumLTT[x] = np.random.choice(self.totFeature, self.features, replace=False)
                self.MODELTOOLS[x].gru_model.fit(self.trainXNP[:,:,self.colNumLT[x]], self.trainYNP,
                                             validation_data = (self.validXNP[:,:,self.colNumLT[x]], self.validYNP),
                                             batch_size=20,
                                             epochs=nEpochs,
                                             callbacks=[earlyStop],
                                             verbose=0)
            self.trErr.append(((self.MODELTOOLS[x].gru_model.predict(self.trainXNP[:,:,self.colNumLT[x]])[:,-1,-1]-self.trainYNP[:,-1,-1])**2).sum())

        self.avgWeights = (1-np.array(self.trErr))/(1-np.array(self.trErr)).sum()

        print('{:23}  - complete!'.format(''))


    def saveTrainInfo(self):
        print("({}) [ Save Training Information ]".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
        trainInfoDir = self.trainInfoDir + '/out/train_info/{}/{}'.format(self.trainEnd, self.modelName)

        modelWeightsDir = trainInfoDir + '/model_weights/{}/{:03}'.format(self.market, self.ahead)
        avgWeightsDir = trainInfoDir + '/avg_weights/{}/{:03}'.format(self.market, self.ahead)
        colNumDir = trainInfoDir + '/columns_number/{}/{:03}'.format(self.market, self.ahead)

        if not os.path.isdir(modelWeightsDir):
            os.makedirs(modelWeightsDir)

        if not os.path.isdir(avgWeightsDir):
            os.makedirs(avgWeightsDir)

        if not os.path.isdir(colNumDir):
            os.makedirs(colNumDir)

        for x in range(10):
            self.MODELTOOLS[x].gru_model.save_weights(modelWeightsDir+'/model_{:02}.h5'.format(x))
            with open(avgWeightsDir+'/model_{:02}.pkl'.format(x), 'wb') as f:
                pickle.dump(self.avgWeights[x], f, pickle.HIGHEST_PROTOCOL)
            with open(colNumDir+'/model_{:02}.pkl'.format(x), 'wb') as f:
                pickle.dump(self.colNumLT[x], f, pickle.HIGHEST_PROTOCOL)

    def trainingN(self, nNumTrain, nEpochs=200):
        """
        부분 컬럼 생성하는 것 제거함. 컬럼이 모두 들어가는 형태로 변경
        뒤에 다른 메소드랑 맞지 않을 것임ㄴ
        """
        print("({}) [ Train GRU Models ]".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
        # deploy 10 models
        def modelgen():
            return ModelTools(self.timeSteps,
                              self.features,
                              self.initialState,
                              self.stacks,
                              self.hiddenNodes,
                              self.regularFactor,
                              self.lossName,
                              self.optimizer)

        self.MODELTOOLS = [modelgen() for x in range(nNumTrain)]
        earlyStop = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=30)

        self.trErr = []
        for x in range(nNumTrain):
            print('{:23}   : Model {:02}'.format('', x))
            self.MODELTOOLS[x].gru_model.fit(self.trainXNP, self.trainYNP,
                                             validation_data = (self.validXNP, self.validYNP),
                                             batch_size=20,
                                             epochs=nEpochs,
                                             callbacks=[earlyStop],
                                             verbose=0)
            while np.isnan(self.MODELTOOLS[x].gru_model.predict(self.trainXNP)[:,-1,-1]).sum()!=0:
                self.MODELTOOLS[x] = modelgen()
                self.colNumLTT[x] = np.random.choice(self.totFeature, self.features, replace=False)
                self.MODELTOOLS[x].gru_model.fit(self.trainXNP, self.trainYNP,
                                             validation_data = (self.validXNP, self.validYNP),
                                             batch_size=20,
                                             epochs=nEpochs,
                                             callbacks=[earlyStop],
                                             verbose=0)
            self.trErr.append(((self.MODELTOOLS[x].gru_model.predict(self.trainXNP)[:,-1,-1]-self.trainYNP[:,-1,-1])**2).sum())

        self.avgWeights = (1-np.array(self.trErr))/(1-np.array(self.trErr)).sum()

        print('{:23}  - complete!'.format(''))




    def predValidation(self):
        print("({}) [ predValidation ]".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
        for x in range(10):
            inputs = np.where(self.validNP[:,:,self.colNumLT[x]]>1, 1, self.validNP[:,:,self.colNumLT[x]])
            inputs = np.where(inputs<0, 0, inputs)
            if x==0:
                prediction0 = self.MODELTOOLS[x].gru_model.predict(inputs)[:,-1,-1] * self.avgWeights[x]
                prediction1 = self.MODELTOOLS[x].gru_model.predict(self.xNP[:,:,self.colNumLT[x]])[:,-1,-1] * self.avgWeights[x]
            else:
                prediction0 += self.MODELTOOLS[x].gru_model.predict(inputs)[:,-1,-1] * self.avgWeights[x]
                prediction1 += self.MODELTOOLS[x].gru_model.predict(self.xNP[:,:,self.colNumLT[x]])[:,-1,-1] * self.avgWeights[x]

        self.prediction = prediction0*0.8 + prediction1*0.2








class ForecastTools(DataTools):
    def __init__(self, sDirHome, sMarket, nAhead, sModelName, sTrainEnd, sTestStart, sTestEnd, nVerbose=1):
        super().__init__(sDirHome, sMarket, nAhead, sModelName, sTrainEnd, nVerbose)
        print('Setup')
        # Generate Models
        self.MODELTOOLS = [ModelTools(self.timeSteps,
                                      self.features,
                                      self.initialState,
                                      self.stacks,
                                      self.hiddenNodes,
                                      self.regularFactor,
                                      self.lossName,
                                      self.optimizer) for x in range(10)]

        # Date Parameters
        self.testStart = sTestStart
        self.testEnd = sTestEnd

        # # Process
        # self.getData()
        # self.preprocData()
        # self.readTrainInfo()
        # self.forecasting()
        # self.makeOutput()
        # self.saveForecastInfo()

    def getTestData(self):
        self.readConvertedData(self.testEnd)
        self.readVarImpData(self.totFeature, self.trainEnd)
        self.readRawData(self.testEnd)
        self.slicingData()

    def preprocTestData(self):	
        self.minmaxComputer()
        self.minmaxNormalization()
        dateDF, xNP = self.toNumpy(self.timeUnit, self.timeSteps, bNormal=True, bYOn=False)
        # Target data
        self.dateDF = dateDF.iloc[0:1]
        self.xNP = xNP[0:1, :, :]



    def readTrainInfo(self):
        print("({}) [ Read Training Information ]".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
        trainInfoDir = self.trainInfoDir + '/out/train_info/{}/{}'.format(self.trainEnd, self.modelName)

        modelWeightsDir = trainInfoDir + '/model_weights/{}/{:03}'.format(self.market, self.ahead)
        avgWeightsDir = trainInfoDir + '/avg_weights/{}/{:03}'.format(self.market, self.ahead)
        colNumDir = trainInfoDir + '/columns_number/{}/{:03}'.format(self.markets, self.ahead)

        self.avgWeights = []
        self.colNumLT = []
        for x in range(10):
            self.MODELTOOLS[x].gru_model.load_weights(modelWeightsDir+'/model_{:02}.h5'.format(x))
            with open(avgWeightsDir+'/model_{:02}.pkl'.format(x), 'rb') as f:
                self.avgWeights.append(pickle.load(f))
            with open(colNumDir+'/model_{:02}.pkl'.format(x), 'rb') as f:
                self.colNumLT.append(pickle.load(f))



    def forecasting(self):
        print("({}) [ Forecasting '{}' {} Ahead ]".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), self.market, self.ahead))
        for x in range(10):
            inputs = np.where(self.xNP[:,:,self.colNumLT[x]]>1, 1, self.xNP[:,:,self.colNumLT[x]])
            inputs = np.where(inputs<0, 0, inputs)
            if x==0:
                prediction0 = self.MODELTOOLS[x].gru_model.predict(inputs)[:,-1,-1] * self.avgWeights[x]
                prediction1 = self.MODELTOOLS[x].gru_model.predict(self.xNP[:,:,self.colNumLT[x]])[:,-1,-1] * self.avgWeights[x]
            else:
                prediction0 += self.MODELTOOLS[x].gru_model.predict(inputs)[:,-1,-1] * self.avgWeights[x]
                prediction1 += self.MODELTOOLS[x].gru_model.predict(self.xNP[:,:,self.colNumLT[x]])[:,-1,-1] * self.avgWeights[x]

        self.prediction = prediction0*0.8 + prediction1*0.2

    def makeOutput(self):
        stdDF = self.convertedDF[['date']+self.yVar]
        stdDF.sort_values('date', ascending=False, inplace=True)
        stdDF.dropna(inplace=True)
        stdDF.reset_index(drop=True, inplace=True)
        stdDF = stdDF.iloc[:self.predIntervalSteps, 1]
        movingStd = stdDF.std()

        self.outputDF = pd.DataFrame({'BSE_DT':[self.dateDF.iloc[0,0]],
                                      'CST_TGT_PD_CD':[self.market],
                                      'CST_MD_GB_CD':['A1'],
                                      'CST_DT_GB_CD':[self.ahead],
                                      'CST_DT':[self.dateDF.iloc[0,1]],
                                      'IDX_BSE_VAL':[self.rawDF.iloc[-1,1]],
                                      'CST_IDX_BSE_VAL':[self.rawDF.iloc[-1,1]*(1+self.prediction[0])],
                                      'CST_IDX_MIN_VAL':[self.rawDF.iloc[-1,1]*(1+self.prediction[0]-movingStd*1.65)],
                                      'CST_IDX_MAX_VAL':[self.rawDF.iloc[-1,1]*(1+self.prediction[0]+movingStd*1.65)],
                                      'CST_IDX_BSE_RT':[self.prediction[0]],
                                      'CST_IDX_MIN_RT':[self.prediction[0]-movingStd*1.65],
                                      'CST_IDX_MAX_RT':[self.prediction[0]+movingStd*1.65]})


    def saveForecastInfo(self):
        print("({}) [ Save Forecast Information ]".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
        frctInfoDir = self.homeDir + '/out/forecast_info/{}'.format(self.sWorkingDate)
        outputDir = frctInfoDir + '/output'

        if not os.path.isdir(outputDir):
            os.makedirs(outputDir)

        self.outputDF.to_csv(outputDir+'/{}_{:03}.csv'.format(self.market, self.ahead), index=False)  