# Define the model
## define RNN LSTM GRU

In [None]:
# -*- coding: UTF-8 -*-


import torch
from torch.nn import Module, LSTM, Linear, RNN, GRU
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

class Net(Module):
    
    def __init__(self,config):
        super(Net, self).__init__()
        if config.model_type =='LSTM':
            self.rnn = LSTM(input_size=config.input_size, hidden_size=config.hidden_size,
                            num_layers=config.lstm_layers, batch_first=True, dropout=config.dropout_rate) #set batch_first to True here to make the first dimension become batch.
        elif config.model_type =='RNN':
            self.rnn = RNN(input_size=config.input_size, hidden_size=config.hidden_size,
                            num_layers=config.lstm_layers, batch_first=True, dropout=config.dropout_rate)
        elif config.model_type =='GRU':
            
            self.rnn = GRU(input_size=config.input_size, hidden_size=config.hidden_size,                   #Since all three neural networks have the same number of layers, 
                            num_layers=config.lstm_layers, batch_first=True, dropout=config.dropout_rate)  # their num_layer is lstm_layers, 
        self.linear = Linear(in_features=config.hidden_size, out_features=config.output_size)              # and they only need to be defined once after that.


    def forward(self, x, hidden=None):
        #net_out, hidden = self.lstm(x, hidden)
        #net_out, hidden = self.rnn(x, hidden)
        net_out, hidden = self.rnn(x, hidden)
        linear_out = self.linear(net_out)
        return linear_out, hidden


def train(config, logger, train_and_valid_data):
    if config.do_train_visualized:
        import visdom
        vis = visdom.Visdom(env='model_pytorch')

    train_X, train_Y, valid_X, valid_Y = train_and_valid_data
    train_X, train_Y = torch.from_numpy(train_X).float(), torch.from_numpy(train_Y).float()     # turn to tensor first
    train_loader = DataLoader(TensorDataset(train_X, train_Y), batch_size=config.batch_size)    # DataLoader can automatically generate trainable batch data

    valid_X, valid_Y = torch.from_numpy(valid_X).float(), torch.from_numpy(valid_Y).float()
    valid_loader = DataLoader(TensorDataset(valid_X, valid_Y), batch_size=config.batch_size)

    device = torch.device("cuda:0" if config.use_cuda and torch.cuda.is_available() else "cpu") # CPU or GPU
    model = Net(config).to(device)      # If it is GPU training, .to(device) will copy the model/data to GPU memory
    if config.add_train:                # If it is incremental training, the original model parameters will be loaded first
        model.load_state_dict(torch.load(config.model_save_path + config.model_name))
    #TODO
    if config.optimizer == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    criterion = torch.nn.MSELoss()      # define optimizer and loss function

    valid_loss_min = float("inf")
    bad_epoch = 0
    global_step = 0
    step_count = 0
    losses = []
    ##train for epochs
    for epoch in range(config.epoch):
        logger.info("Epoch {}/{}".format(epoch, config.epoch))
        model.train()                   
        train_loss_array = []
        hidden_train = None
        for i, _data in enumerate(train_loader):
            _train_X, _train_Y = _data[0].to(device),_data[1].to(device)
            optimizer.zero_grad()           
            pred_Y, hidden_train = model(_train_X, hidden_train)    # forward calculation 

            if not config.do_continue_train:
                hidden_train = None            
            else:
                h_0, c_0 = hidden_train
                h_0.detach_(), c_0.detach_()    # remove gradient information
                hidden_train = (h_0, c_0)
            
                
            loss = criterion(pred_Y, _train_Y)  # Calculate loss
            if step_count % 500 ==0:
                losses.append(loss.item())
            loss.backward()                     # Backpropagating the loss
            step_count+=1
            optimizer.step()                    # Update parameters with optimizer
            train_loss_array.append(loss.item())
            global_step += 1
            if config.do_train_visualized and global_step % 100 == 0:   # Displayed every hundred steps
                vis.line(X=np.array([global_step]), Y=np.array([loss.item()]), win='Train_Loss',
                         update='append' if global_step > 0 else None, name='Train', opts=dict(showlegend=True))

        # The following is the early stop mechanism. 
        # When the model training does not improve the prediction effect of the validation set for consecutive config.patience epochs, 
        # it stops to prevent overfitting.
        model.eval()                    
        valid_loss_array = []
        hidden_valid = None
        for _valid_X, _valid_Y in valid_loader:
            _valid_X, _valid_Y = _valid_X.to(device), _valid_Y.to(device)
            pred_Y, hidden_valid = model(_valid_X, hidden_valid)
            if not config.do_continue_train: hidden_valid = None
            loss = criterion(pred_Y, _valid_Y)  # The verification process only has forward calculation, no back propagation process
            valid_loss_array.append(loss.item())

        train_loss_cur = np.mean(train_loss_array)
        valid_loss_cur = np.mean(valid_loss_array)
        logger.info("The train loss is {:.6f}. ".format(train_loss_cur) +
              "The valid loss is {:.6f}.".format(valid_loss_cur))
        if config.do_train_visualized:      
            vis.line(X=np.array([epoch]), Y=np.array([train_loss_cur]), win='Epoch_Loss',
                     update='append' if epoch > 0 else None, name='Train', opts=dict(showlegend=True))
            vis.line(X=np.array([epoch]), Y=np.array([valid_loss_cur]), win='Epoch_Loss',
                     update='append' if epoch > 0 else None, name='Eval', opts=dict(showlegend=True))

        if valid_loss_cur < valid_loss_min:
            valid_loss_min = valid_loss_cur
            bad_epoch = 0
            torch.save(model.state_dict(), config.model_save_path + config.model_name)  # save the model
        else:
            bad_epoch += 1
            if bad_epoch >= config.patience:
                #If the validation set indicators do not improve for consecutive patient epochs, stop training
                logger.info(" The training stops early in epoch {}".format(epoch))
   
                break
    plt.plot(losses,label = 'loss')
    plt.legend(loc=0)
    plt.show()
    ##predict the test data
def predict(config, test_X):
    # Get test data
    test_X = torch.from_numpy(test_X).float()
    test_set = TensorDataset(test_X)
    test_loader = DataLoader(test_set, batch_size=1)

    # load the model
    device = torch.device("cuda:0" if config.use_cuda and torch.cuda.is_available() else "cpu")
    model = Net(config).to(device)
    model.load_state_dict(torch.load(config.model_save_path + config.model_name))   #Loading model parameters

    # First define a tensor to save the prediction results
    result = torch.Tensor().to(device)

    model.eval()
    hidden_predict = None
    for _data in test_loader:
        data_X = _data[0].to(device)
        pred_X, hidden_predict = model(data_X, hidden_predict)
        # The experiment found that whether it is continuous training mode or not, 
        #it is better to pass the hidden of the previous time_step into the next one.
        cur_pred = torch.squeeze(pred_X, dim=0)
        result = torch.cat((result, cur_pred), dim=0)

    return result.detach().cpu().numpy()   


# setting

In [None]:
# -*- coding: UTF-8 -*-
"""

"""

import pandas as pd
import numpy as np
import os
import sys
import time
import logging
from logging.handlers import RotatingFileHandler
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


class Config:
    def __init__(self):
        #self.label_columns = label
        #self.predict_day = pre

        self.feature_columns = list([1,2,3,4])  #choose features: midpoint、spread、buys、sells
        self.label_columns = [1]                  #label：midpoint
        # label_in_feature_index = [feature_columns.index(i) for i in label_columns] 
        self.label_in_feature_index = (lambda x, y: [x.index(i) for i in y])(self.feature_columns,self.label_columns)  
        self.model_type = 'LSTM'         #LSTM or RNN or GRU
        # TODO
        self.predict_day = 5             # input the number of moments want to predict next
        self.optimizer = 'SGD'           # SGD or Adam
        # set network parameters
        self.input_size = len(self.feature_columns)
        self.output_size = len(self.label_columns)

        self.hidden_size = 128  # 
        self.lstm_layers = 2  #
        self.dropout_rate = 0.2  # dropout rate
        self.time_step = 60  # This parameter is very important. It is used to set the number of previous data to predict, 
        #and it is also the number of time steps of LSTM. 
        #Please ensure that the amount of training data is larger than it.

        # set training parameters
        self.do_train = True
        self.do_predict = True                  # whether to predict next moments
        self.add_train = False                  # whether to load existing model parameters for incremental training
        self.shuffle_train_data = True          # whether to shuffle train data
        self.use_cuda = False                   
        self.train_data_rate = 0.7              #train dataset: 70% of data
        self.valid_data_rate = 0.1              #valid dataset: 10% of data
        self.batch_size = 64
        self.learning_rate = 3e-4   #0.001
        self.epoch = 20                         # epochs of training
        self.patience = 5                       #stop if the validation set is not improved in 5 epochs
        self.random_seed = 42                   #random seed 

        self.do_continue_train = False  #
        self.continue_flag = ""  #
        if self.do_continue_train:
            self.shuffle_train_data = False
            self.batch_size = 1
            self.continue_flag = "continue_"

        # training mode
        self.debug_mode = False     #whether to debug
        self.debug_num = 500  

        # framework: pytorch
        self.used_frame = 'pytorch'  
        model_postfix = {"pytorch": ".pth", "keras": ".h5", "tensorflow": ".ckpt"}
        self.model_name = "model_" + self.continue_flag + self.used_frame + model_postfix[self.used_frame]

        # set path parameters
        # TODO
        self.train_data_path = "./data/btc_data.csv"
        self.model_save_path = "./checkpoint/" + self.used_frame + "/"
        self.figure_save_path = "./figure/"
        self.log_save_path = "./log/"
        self.do_log_print_to_screen = True
        self.do_log_save_to_file = True  #
        # TODO
        self.do_figure_save = True
        self.do_train_visualized = False  # 
        if not os.path.exists(self.model_save_path):
            os.makedirs(self.model_save_path)  # 
        if not os.path.exists(self.figure_save_path):
            os.mkdir(self.figure_save_path)
        if self.do_train and (self.do_log_save_to_file or self.do_train_visualized):
            cur_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
            log_save_path = self.log_save_path + cur_time + '_' + self.used_frame + "/"
            os.makedirs(log_save_path)


class Data:
    def __init__(self, config):
        self.config = config
        self.data, self.data_column_name = self.read_data()


        self.data_num = self.data.shape[0]
        self.train_num = int(self.data_num * self.config.train_data_rate)

        self.mean = np.mean(self.data, axis=0)              # evaluate the mean of the data
        self.std = np.std(self.data, axis=0)                #evaluate the variance of the data
        self.norm_data = (self.data - self.mean)/self.std  

        self.start_num_in_test = 0      

    def read_data(self):                # read data from csv file
        if self.config.debug_mode:
            init_data = pd.read_csv(self.config.train_data_path, nrows=self.config.debug_num,
                                    usecols=self.config.feature_columns)
        else:
            init_data = pd.read_csv(self.config.train_data_path, usecols=self.config.feature_columns)
        return init_data.values, init_data.columns.tolist()     # .columns.tolist() to get the name of columns

    def get_train_and_valid_data(self):
        feature_data = self.norm_data[:self.train_num]
        label_data = self.norm_data[self.config.predict_day : self.config.predict_day + self.train_num,
                                    self.config.label_in_feature_index]    

        if not self.config.do_continue_train:
            #
            train_x = [feature_data[i:i+self.config.time_step] for i in range(self.train_num-self.config.time_step)]
            train_y = [label_data[i:i+self.config.time_step] for i in range(self.train_num-self.config.time_step)]
        else:
            # In continuous training mode, 
            #each time_step row of data will be used as a sample, and the two samples are staggered by the time_step row.
            train_x = [feature_data[start_index + i*self.config.time_step : start_index + (i+1)*self.config.time_step]
                       for start_index in range(self.config.time_step)
                       for i in range((self.train_num - start_index) // self.config.time_step)]
            train_y = [label_data[start_index + i*self.config.time_step : start_index + (i+1)*self.config.time_step]
                       for start_index in range(self.config.time_step)
                       for i in range((self.train_num - start_index) // self.config.time_step)]

        train_x, train_y = np.array(train_x), np.array(train_y)

        train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=self.config.valid_data_rate,
                                                              random_state=self.config.random_seed,
                                                              shuffle=self.config.shuffle_train_data)   # Divide training and validation sets, and shuffle
        return train_x, valid_x, train_y, valid_y

    def get_test_data(self, return_label_data=False):
        feature_data = self.norm_data[self.train_num:]
        sample_interval = min(feature_data.shape[0], self.config.time_step)     
        self.start_num_in_test = feature_data.shape[0] % sample_interval  
        time_step_size = feature_data.shape[0] // sample_interval

        # In the test data, each time_step row of data will be used as a sample, 
        #and the two samples are staggered by the time_step row
        test_x = [feature_data[self.start_num_in_test+i*sample_interval : self.start_num_in_test+(i+1)*sample_interval]
                   for i in range(time_step_size)]
        if return_label_data:       
            label_data = self.norm_data[self.train_num + self.start_num_in_test:, self.config.label_in_feature_index]
            return np.array(test_x), label_data
        return np.array(test_x)

# Defining drawing function

In [None]:
def load_logger(config):
    logger = logging.getLogger()
    logger.setLevel(level=logging.DEBUG)

    # StreamHandler
    if config.do_log_print_to_screen:
        stream_handler = logging.StreamHandler(sys.stdout)
        stream_handler.setLevel(level=logging.INFO)
        formatter = logging.Formatter(datefmt='%Y/%m/%d %H:%M:%S',
                                      fmt='[ %(asctime)s ] %(message)s')
        stream_handler.setFormatter(formatter)
        logger.addHandler(stream_handler)

    # FileHandler
    if config.do_log_save_to_file:
        file_handler = RotatingFileHandler(config.log_save_path + "out.log", maxBytes=1024000, backupCount=5)
        file_handler.setLevel(level=logging.INFO)
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)

        # Also record the config information to the log file
        config_dict = {}
        for key in dir(config):
            if not key.startswith("_"):
                config_dict[key] = getattr(config, key)
        config_str = str(config_dict)
        config_list = config_str[1:-1].split(", '")
        config_save_str = "\nConfig:\n" + "\n'".join(config_list)
        logger.info(config_save_str)

    return logger

def draw(config: Config, origin_data: Data, logger, predict_norm_data: np.ndarray):
    label_data = origin_data.data[origin_data.train_num + origin_data.start_num_in_test : ,
                                            config.label_in_feature_index]
    predict_data = predict_norm_data * origin_data.std[config.label_in_feature_index] + \
                   origin_data.mean[config.label_in_feature_index]   # Restore data with saved mean and variance
    assert label_data.shape[0]==predict_data.shape[0], "The element number in origin and predicted data is different"

    label_name = [origin_data.data_column_name[i] for i in config.label_in_feature_index]
    label_column_num = len(config.label_columns)

    loss = np.mean((label_data[config.predict_day:] - predict_data[:-config.predict_day] ) ** 2, axis=0)
    loss_norm = loss/(origin_data.std[config.label_in_feature_index] ** 2)
    logger.info("The mean squared error of btc {} is ".format(label_name) + str(loss_norm))

    label_X = range(origin_data.data_num - origin_data.train_num - origin_data.start_num_in_test)
    predict_X = [ x + config.predict_day for x in label_X]

    res = {}
    for i in range(int(config.predict_day)):
        if label_name == ['low','high']:
            res[i] = dict(low=np.squeeze(predict_data[-config.predict_day:, 0][i]),high=np.squeeze(predict_data[-config.predict_day:, 1][i]))
        elif label_name == ['low']:
            res[i] = dict(low=np.squeeze(predict_data[-config.predict_day:, 0][i]))
        elif label_name == ['high']:
            res[i] = dict(high=np.squeeze(predict_data[-config.predict_day:, 0][i]))
        elif label_name == ['midpoint']:
            res[i] = dict(mid=np.squeeze(predict_data[-config.predict_day:, 0][i]))
    #print(res)

    if not sys.platform.startswith('linux'):    # The output cannot be output under Linux without a desktop.
        for i in range(label_column_num):
            #print(i)
            plt.figure(i+1)                     # Forecast data plot
            plt.plot(label_X, label_data[:, i], label='label',color = 'b')
            plt.plot(predict_X, predict_data[:, i], label='predict',color = 'r')
            plt.title("Predict btc {} price with {}".format(label_name[i], config.used_frame))
            logger.info("The predicted btc {} for the next {} day(s) is: ".format(label_name[i], config.predict_day) +
                  str(np.squeeze(predict_data[-config.predict_day:, i])))
            if config.do_figure_save:
                plt.savefig(config.figure_save_path+"{}predict_{}_with_{}.png".format(config.continue_flag, label_name[i], config.used_frame))
        plt.legend(loc=0)
        plt.show()

    return res

In [None]:
def main(config):
    logger = load_logger(config)   #load config
    np.random.seed(config.random_seed)  # Set a random seed to ensure reproducibility
    data_gainer = Data(config)

    if config.do_train:
        train_X, valid_X, train_Y, valid_Y = data_gainer.get_train_and_valid_data()   #get train data
        train(config, logger, [train_X, train_Y, valid_X, valid_Y])                   #train the data

    if config.do_predict:
        test_X, test_Y = data_gainer.get_test_data(return_label_data=True)          #get test data
        pred_result = predict(config, test_X)       # The output here is the unrestored normalized prediction data
        out_dict = draw(config, data_gainer, logger, pred_result)                   #draw the picture

    return out_dict

# Start Training

In [None]:

con = Config()

con.model_type ='RNN'
con.optimizer = 'SGD'
out = main(con)


In [None]:
con = Config()

con.model_type ='RNN'
con.optimizer = 'Adam'
out = main(con)

In [None]:
con = Config()

con.model_type ='LSTM'
con.optimizer = 'Adam'
out = main(con)

In [None]:
con = Config()

con.model_type ='LSTM'
con.optimizer = 'SGD'
out = main(con)

In [None]:
con = Config()

con.model_type ='GRU'
con.optimizer = 'SGD'
out = main(con)

In [None]:
con = Config()

con.model_type ='GRU'
con.optimizer = 'Adam'
out = main(con)