In [26]:
import tqdm
import numpy as np
import pandas as pd
from datetime import datetime
import pandas_datareader as pdr

In [3]:
# add new features
def baseline_features(df):
    df['open_sub_close'] = df['Open'] - df['Close']
    df['high_div_low'] = df['High'] / df['Low']
    
    # MA8, 21, 50, 200 - close price
    df['close_ma8'] = df['Close'].rolling(8).mean()
    df['close_ma21'] = df['Close'].rolling(21).mean()
    df['close_ma50'] = df['Close'].rolling(50).mean()
    df['close_ma200'] = df['Close'].rolling(200).mean()
    
    # MA8, 21, 50, 200 - volume
    df['vol_ma8'] = df['Volume'].rolling(8).mean()
    df['vol_ma21'] = df['Volume'].rolling(21).mean()
    df['vol_ma50'] = df['Volume'].rolling(50).mean()
    df['vol_ma200'] = df['Volume'].rolling(200).mean()
    
    # RSI
    delta = df['Close'].diff()
    up, down = delta.clip(lower=0), -1*delta.clip(upper=0)
    ema_up, ema_down = up.ewm(com=13, adjust=False).mean(), down.ewm(com=13, adjust=False).mean()
    rs = ema_up / ema_down
    df['RSI'] = 100 - (100/(1+rs))
    
    # MACD
    exp1, exp2 = df['Close'].ewm(span=12, adjust=False).mean(), df['Close'].ewm(span=26, adjust=False).mean()
    macd = exp1 - exp2
    df['MACD'] = macd.ewm(span=9, adjust=False).mean()
    
    # OBV
    df['OBV'] = (np.sign(df['Close'].diff())*df['Volume']).fillna(0).cumsum()
    
    return df

In [21]:
bnb = pd.read_csv('bnb.csv')
bnb.head()

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target
0,1523956260,0,7.0,12.4195,12.4195,12.4101,12.4195,794.7,12.411386,-0.004366
1,1523956320,0,33.0,12.4195,12.4195,12.4001,12.415,1117.73,12.407532,-0.00394
2,1523956380,0,32.0,12.415,12.4195,12.4003,12.41,1062.37,12.401607,-0.003153
3,1523956440,0,38.0,12.4006,12.41,12.3931,12.4,2259.55,12.399251,-0.003429
4,1523956500,0,79.0,12.4,12.421,12.393,12.3942,7113.37,12.406144,-0.002187


In [29]:
start = pd.to_datetime(bnb.iloc[0,0].astype('datetime64[s]'))
end = pd.to_datetime(bnb.iloc[-1,0].astype('datetime64[s]'))

In [30]:
def advance_data():
    
    # commodity
    gc = pdr.get_data_yahoo('GC=F', start, end) # gold
    sil = pdr.get_data_yahoo('SI=F', start, end) # silver
    oil = pdr.get_data_yahoo('CL=F', start, end) # crude oil
    
    # dollar and bond
    dollar = pdr.get_data_yahoo('EURUSD=X', start, end) # EUR/USD
    bond = pdr.get_data_yahoo('^TNX', start, end) # 10-year treasury bond
    
    # futures 22
    wheat = pdr.get_data_yahoo('ZWH22.CBT', start, end) # wheat
    corn = pdr.get_data_yahoo('ZCH22.CBT', start, end) # corn
    soy = pdr.get_data_yahoo('ZSH22.CBT', start, end) # soybean
    oat = pdr.get_data_yahoo('ZOH22.CBT', start, end) # oat
    usd = pdr.get_data_yahoo('DXH22.NYB', start, end) # USD
    
    # stock market
    spy = pdr.get_data_yahoo('SPY', start, end) # SPY
    dia = pdr.get_data_yahoo('DIA', start, end) # DIA
    qqq = pdr.get_data_yahoo('QQQ', start, end) # QQQ
    iwm = pdr.get_data_yahoo('IWM', start, end) # russell
    vix = pdr.get_data_yahoo('^VIX', start, end) # VIX
    bkch = pdr.get_data_yahoo('BKCH', start, end) # blockchain etf

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-04-17,1348.099976,1339.000000,1348.000000,1347.199951,409,1347.199951
2018-04-18,1354.199951,1343.500000,1343.500000,1351.199951,45,1351.199951
2018-04-19,1355.199951,1341.599976,1348.599976,1346.800049,387,1346.800049
2018-04-20,1343.400024,1335.300049,1342.500000,1336.699951,310,1336.699951
2018-04-23,1334.400024,1322.500000,1334.000000,1322.500000,68,1322.500000
...,...,...,...,...,...,...
2021-09-16,1792.900024,1743.900024,1792.900024,1754.599976,1645,1754.599976
2021-09-17,1763.000000,1749.400024,1758.800049,1749.400024,581,1749.400024
2021-09-20,1761.800049,1750.900024,1751.800049,1761.800049,185,1761.800049
2021-09-21,1776.000000,1759.000000,1762.199951,1776.000000,106,1776.000000
