In [1]:
# this is still a very early version, and still working on making a framework that will get basic things going. Will enhance and polish all areas afterwards.

In [2]:
# import libraries
from marketstackAPI import Marketstack
import pandas as pd
import numpy as np
import cufflinks as cf
import ta
import holidays
import matplotlib as plt
import lightgbm as lgb

from datetime import datetime
from datetime import timedelta
from imblearn.over_sampling import SMOTE
from IPython.core.display import display, HTML

In [3]:
# jupyter notebook settings and chart size configs
display(HTML("<style>.container { width:100% !important; }</style>"))
plt.rcParams['figure.figsize'] = [12, 5]
plt.rcParams['figure.dpi'] = 200
pd.options.plotting.backend = "plotly"

In [4]:
# initialize and set parameters
MS = Marketstack() # requires API key from Marketstack with basic plan to get 10 years worth of data
cf.set_config_file(theme='henanigans',sharing='public',offline=True)

In [5]:
def raw_data_preprocessing(raw_data):
    """
    Clean raw_data by removing extra columns, renaming columns, order by date in descending order, reset index number.
    this data format will be used as the standard format for all other feature engineering related function calls.
    
    Parameters
    ----------
    raw_data : pandas dataframe that contains ['date','adj_high','adj_low','adj_close','adj_open','adj_volume'] columns, ordered by date in ascending order.
    
    Return:
    ----------
    standard_data: pandas dataframe that contains ['date','high','low','close','open','volume'] columns, ordered by date in descending order.
    
    """
    data = raw_data[:]
    data = data[['date','adj_high','adj_low','adj_close','adj_open','adj_volume']]
    data.columns = ['date','high','low','close','open','volume']
    data = data[::-1]
    data.reset_index(inplace=True, drop=True)
    return data

In [6]:
def get_ta_indicators(standard_data, prefix = ''):
    """
    Compute technical indicators for every period, each row within standard_data is a period.
    
    Parameters
    ----------
    standard_data : pandas dataframe that contains ['date','high','low','close','open','volume'] columns, ordered by date in descending order.
    prefix: string that will be concatennated to before all technical indicators names
    
    Return:
    ----------
    data: pandas dataframe that contains computed technical indicators with corrsponding name
    
    """
    data = standard_data[:]
    df = pd.DataFrame()
    df.insert(0, prefix+'_stochrsi_14' if prefix else 'stochrsi_14', ta.momentum.stochrsi(close = data.close)/100) # range 0 to 100 rescaled to 0 to 1
    df.insert(0, prefix+'_mfi_14' if prefix else 'mfi_14', ta.volume.money_flow_index(high = data.high, low = data.low, close = data.close, volume= data.volume)/100) # range 0 to 100 rescaled to 0 to 1
    df.insert(0, prefix+'_adx_14' if prefix else 'adx_14', ta.trend.adx(high = data.high, low = data.low, close = data.close)/100) # range 0 to 100 rescaled to 0 to 1
    df.insert(0, prefix+'_adx_neg_14' if prefix else 'adx_neg_14', ta.trend.adx_neg(high = data.high, low = data.low, close = data.close)/100) # range 0 to 100 rescaled to 0 to 1
    df.insert(0, prefix+'_adx_pos_14' if prefix else 'adx_pos_14', ta.trend.adx_pos(high = data.high, low = data.low, close = data.close)/100) # range 0 to 100 rescaled to 0 to 1
    df.insert(0, prefix+'_aroon_up_25' if prefix else 'aroon_up_25', ta.trend.aroon_up(close = data.close)/100) # range 0 to 100 rescaled to 0 to 1
    df.insert(0, prefix+'_aroon_down_25' if prefix else 'aroon_down_25', ta.trend.aroon_down(close = data.close)/100) # range 0 to 100 rescaled to 0 to 1
    df.insert(0, prefix+'_aroon_25' if prefix else 'aroon_25', (ta.trend.aroon_up(close = data.close) - ta.trend.aroon_down(close = data.close))/100) # range 0 to 100 rescaled to 0 to 1
    
    return df

In [7]:
def get_percent_changes(standard_data, prefix = ''):
    """
    Compute basic % changes
    
    Parameters
    ----------
    standard_data : pandas dataframe that contains ['date','high','low','close','open','volume'] columns, ordered by date in descending order.
    prefix: string that will be concatennated to before all technical indicators names
    
    Return:
    ----------
    data: pandas dataframe that contains computed % changes indicators with corrsponding name
    
    """
    data = standard_data[:]
    df = pd.DataFrame()
    #add volume % change from yesterday to today
    df.insert(0,'volume_change',data.volume/data.volume.shift(1)-1)
    #add price % change from yesterday to today
    df.insert(0,'price_change',data.close/data.close.shift(1)-1)
    return df

In [8]:
def get_target_variable(standard_data):
    """
    Compute target variable.
    the target variable indicates three classes.
    2 : next day is going up significantly
    0: next day is going down significantly
    1 : no significant movement for the next day.
    
    How significant change is defined using more than 1% change at the moment. could be changing to something else.
    
    
    Parameters
    ----------
    standard_data : pandas dataframe that contains ['date','high','low','close','open','volume'] columns, ordered by date in descending order.
    
    Return:
    ----------
    data: pandas dataframe that contains target variable
    
    """
    data = standard_data[:]
    #creating Y
    #calculate daily % change using daily close using the NEXT day close / today close
    df = pd.DataFrame()
    target = data.close.shift(-1)/data.close-1
    target[target > 0.01] = 1
    target[target < -0.01] = -1
    target[(target < 1) & (target > -1)] = 0
    target += 1
    df.insert(0,'target', target)
    df.insert(1,'change', data.close.shift(-1)/data.close-1)
    return df

In [9]:
# remove common rows with nan from full_data and target and return new dataset
def remove_nan(full_data, target):
    to_keep = [not x for x in np.array(list(map(any,full_data.isna().values))) | np.array(list(map(any,target.isna().values)))]
    full_data = full_data[to_keep]
    target = target[to_keep]
    return full_data, target

In [10]:
def eval_strategy(results, data):
    # the evaluation process would be a simulation of trading the stocks at the close and always long/short as much as possible and liquidate the next day
    # assuming no commission per trade, and orders always fill at the close.
    max_balance = 10000
    eval_data = results.merge(data, left_index=True, right_index=True)
    beginning_balance = [10000]
    beginning_cash = [10000]
    shares_owned = [0]
    ending_balance = [10000]
    ending_cash = [10000]
    draw_down = [0]
    actions = ['liquidate']
    for idx, row in eval_data.iterrows():
        beginning_balance.append(ending_balance[-1])
        ending_balance.append(ending_cash[-1]+(shares_owned[-1])*row.close)
        beginning_cash.append(ending_cash[-1])
        ending_cash.append(ending_cash[-1])
        shares_owned.append(shares_owned[-1])
        
        #liquidate
        if row.predictions.argmax() != 2:
            ending_cash[-1] += shares_owned[-1] * row.close
            shares_owned[-1] = 0
            action = 'liquidate'
        # if long
        if row.predictions.argmax() == 2:
            shares_owned[-1] += np.floor(ending_cash[-1]/row.close)
            ending_cash[-1] -= (ending_cash[-1]/row.close) * row.close
            action = 'long'
        # if short
        if row.predictions.argmax() == 0:
            shares_owned[-1] -= np.floor(ending_cash[-1]/row.close)
            ending_cash[-1] -= -(ending_cash[-1]/row.close) * row.close
            action = 'short'
        #calc
        max_balance = max(max_balance,ending_balance[-1])
        draw_down.append(max_balance/ending_balance[-1] - 1)
        actions.append(action)
    df = pd.DataFrame()
    df.insert(0, 'beginning_balance', beginning_balance)
    df.insert(1, 'beginning_cash', beginning_cash)
    df.insert(2, 'shares_owned', shares_owned)
    df.insert(3, 'ending_cash', ending_cash)
    df.insert(4, 'ending_balance', ending_balance)
    df.insert(5, 'draw_down', draw_down)
    df.insert(6, 'actions', actions)
    return df

In [11]:
def eval_buy_and_hold(results, data):
    # the evaluation process would be a simulation of trading the stocks at the close and always long/short as much as possible and liquidate the next day
    # assuming no commission per trade, and orders always fill at the close.
    max_balance = 10000
    eval_data = results.merge(data, left_index=True, right_index=True)
    beginning_balance = [10000]
    beginning_cash = [10000]
    shares_owned = [0]
    ending_balance = [10000]
    ending_cash = [10000]
    draw_down = [0]
    actions = ['liquidate']
    for idx, row in eval_data.iterrows():
        beginning_balance.append(ending_balance[-1])
        ending_balance.append(ending_cash[-1]+(shares_owned[-1])*row.close)
        beginning_cash.append(ending_cash[-1])
        ending_cash.append(ending_cash[-1])
        shares_owned.append(shares_owned[-1])
        
        #liquidate
        ending_cash[-1] += shares_owned[-1] * row.close
        shares_owned[-1] = 0
        action = 'liquidate'
        # long
        
        shares_owned[-1] += np.floor(ending_cash[-1]/row.close)
        ending_cash[-1] -= (ending_cash[-1]/row.close) * row.close
        action = 'long'

        #calc
        max_balance = max(max_balance,ending_balance[-1])
        draw_down.append(max_balance/ending_balance[-1] - 1)
        actions.append(action)
    df = pd.DataFrame()
    df.insert(0, 'beginning_balance', beginning_balance)
    df.insert(1, 'beginning_cash', beginning_cash)
    df.insert(2, 'shares_owned', shares_owned)
    df.insert(3, 'ending_cash', ending_cash)
    df.insert(4, 'ending_balance', ending_balance)
    df.insert(5, 'draw_down', draw_down)
    df.insert(6, 'actions', actions)
    return df

In [12]:
def plot(standard_data):
    """
    Draw interactive candle stick chart OHLC Volume
    
    Parameters
    ----------
    standard_data : pandas dataframe that contains ['date','high','low','close','open','volume'] columns.
    
    """
    qf = cf.QuantFig(standard_data,legend='bottom')
    qf.add_volume()
    qf.iplot()

In [14]:
# get raw data
raw_data = MS.get('NDAQ')

In [15]:
# runtimewarnings are produced from ta library but it's nothing to worry about for this project, will need to figure out a way to suppress this warning message.
# data prep steps then
# drop rows with nan
# naturally the last row contains nan since we don't have info from tomorrow. so we are also dropping the last row.
# but in production we will want to keep the last row so we can use it to make prediction
# for modeling and evaluation purposes it's not useful
# after this step the data is ready to use as training dataset
data = raw_data_preprocessing(raw_data)
target = get_target_variable(data)
indicators = get_ta_indicators(data, 'daily')

# concat/merge datasets to create full_data
full_data = indicators

# last step is to remove rows with nan. i.e. first few rows that don't have enough days of data to compute averages etc, and the last row without future data to compute the targer.
full_data, target = remove_nan(full_data, target)


invalid value encountered in double_scalars


invalid value encountered in double_scalars



In [70]:
# Since this is a timeseries dataset and because of my personal trading experience I'm claming/assuming the underlying relationship between features and target variables isn't stationary, 
# thus the traning method will be in walk-forward style instead of cross-validation; and for each iteration the model isn't going to using everydata available since they aren't as relevant.
# will try to play with weights in the future to see if applying less weights to "outdated" data will help the model.

def train_and_eval(full_data, target):
    training_window_size = 600 # days of data to train the model for each iteration. using about 3 years of data
    predict_window_size  = 10  # days of data used to test and eval the model for each iteration. Using about 2 weeks of data 
                               # ideally the predict_window_size should be set to 1, but that would take too long to train.
                               # maybe I would try it when I have a good model with good parameters
    
    # check full_data len is more than training_window_size + predict_window_size if false throw error
    assert len(full_data) > training_window_size + predict_window_size, "full_data lenght is less than training_window_size + predict_window_size"
    predictions = []
    truths = []
    prediction_results = target[training_window_size:]
    for i in range(training_window_size,len(full_data),predict_window_size):
        # setup train and test data
        train_x = full_data[i-training_window_size:i]
        test_x  = full_data[i:i+predict_window_size]
        train_y = target[i-training_window_size:i]
        test_y  = target[i:i+predict_window_size]
        
        # oversample trainning data to balance the dataset
        oversample = SMOTE()
        train_x, train_y = oversample.fit_resample(train_x, train_y.target)
        
        # create lgb.Dataset for both train and test for lightgbm library use
        train_data = lgb.Dataset(train_x, label=train_y, feature_name=list(train_x.columns))
        validation_data = lgb.Dataset(test_x, label=test_y.target, feature_name=list(test_x.columns))
        
        # setup lightgbm parameters
        param = {'metric': 'multi_logloss', 'objective': 'multiclass', 'num_class':3}
        param['max_depth'] = 30
        param['num_leaves'] = 5
        param['min_data_in_leaf'] = 20
        param['min_sum_hessian_in_leaf'] = 1e-3
        param['bagging_fraction'] = 0.9
        param['bagging_freq'] = 5
        param['bagging_seed'] = 3
        param['feature_fraction'] = 0.9
        param['feature_fraction_bynode'] = 0.9
        param['feature_fraction_seed'] = 2
        param['lambda_l1'] = 0.1
        param['lambda_l2'] = 0.1
        
        num_round = 100
        bst = lgb.train(param, train_data, num_round, valid_sets=[validation_data], early_stopping_rounds=5,verbose_eval=False)
        pred = bst.predict(test_x, num_iteration_predict = bst.best_iteration)
        
        predictions.extend(pred)
        truths.extend(test_y)
    prediction_results.insert(2, 'predictions', predictions)
    
    return bst, prediction_results

In [69]:
model, prediction_results = train_and_eval(full_data, target)


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1943
[LightGBM] [Info] Number of data points in the train set: 912, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1944
[LightGBM] [Info] Number of data points in the train set: 909, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1942
[LightGBM] [Info] Number of data points in the train set: 918, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Star


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 975, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 990, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1005, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] 


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1047, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1041, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1047, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] S


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1086, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1098, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1095, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] S


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1101, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1107, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1101, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] S


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1098, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1101, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1098, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] S


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1059, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1065, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1077, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] S


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in param

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1134, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1137, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1134, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] S


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1137, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1146, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1158, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in param

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1266, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1272, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1269, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] S


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1293, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1287, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1281, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] S


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1341, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1347, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1350, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] S


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1344, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1335, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1314, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] S


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1293, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1287, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1272, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] S


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1215, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1215, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1218, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] S


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1167, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1146, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1128, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] S


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1119, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1116, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 1101, number of used features: 8
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] S


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument


Found `early_stopping_round` in params. Will use it instead of argument



In [72]:
buy_and_hold_result = eval_buy_and_hold(prediction_results, data)
strategy_result = eval_strategy(prediction_results, data)

print('Buy and Hold: Max Drawdown: ', str(round(100*max(buy_and_hold_result.draw_down),2))+'%')
print('LGB Strategy: Max Drawdown: ', str(round(100*max(strategy_result.draw_down),2))+'%')
df = pd.DataFrame([buy_and_hold_result.ending_balance,strategy_result.ending_balance]).T
df.columns = ['buy and hold', 'lgb strategy']
df.plot()

Buy and Hold: Max Drawdown:  63.54%
LGB Strategy: Max Drawdown:  33.65%


In [74]:
plot(data)

In [21]:
# # holiday info
# min_year = int(min(data['date'])[:4])-2
# max_year = int(max(data['date'])[:4])+2
# min_date = str(min_year)+'-01-01'
# max_date = str(max_year)+'-12-31'
# dates = pd.date_range(min_date,max_date).values
# holidays = holidays.UnitedStates(years=range(min_year,max_year))