In [8]:
import tensorflow as tf
tf.random.set_seed(42)
from kerasbeats import prep_time_series, NBeatsModel
from tensorflow import keras
import numpy as np
import pandas as pd


def create_nbeat_mlp(num_columns, num_labels, lookback, horizon, hidden_units, dropout_rates, batch_size, ls=1e-2, lr=1e-3, ):
    nbeats = NBeatsModel(model_type = 'generic', lookback = lookback, horizon = horizon,
                         learning_rate = lr, batch_size = batch_size,
                         num_generic_neurons = hidden_units[0]) # set as default
    nbeats.build_layer()
    time_input = keras.layers.Input(shape = (lookback * horizon, ))
    x_nb = nbeats.model_layer(time_input)

    xcons = keras.layers.Input(shape = (num_columns, ))
    x = keras.layers.Concatenate()([xcons, x_nb])
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(dropout_rates[0])(x)

    for i in range(1, len(hidden_units)):
        x = tf.keras.layers.Dense(hidden_units[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation('swish')(x)
        x = tf.keras.layers.Dropout(dropout_rates[i])(x)

    out = tf.keras.layers.Dense(num_labels, name = 'action')(x)
    model = tf.keras.models.Model(inputs = [time_input, xcons], outputs = out)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  loss = {'action' : tf.keras.losses.MeanSquaredError()},
                  metrics = {'action' : tf.metrics.MeanSquaredError(name = 'mse')})
    return model

In [15]:
import lightgbm as lgb
import pandas as pd
import numpy as np
from itertools import islice
import tscv

def generate_label(df, threshold = 0.002):
    df['label'] = 0
    df.loc[(df['target_15m'] <= -1*threshold), 'label'] = 1
    df.loc[(df['target_15m'] >= threshold), 'label'] = 2
    return df

def get_na_features(df, train_features):
    tmp = pd.DataFrame(df[train_features].isnull().sum())
    tmp = tmp[tmp[0] > 0].reset_index()
    tmp.columns = ['feat', 'cnt']
    tmp = tmp.sort_values('cnt')
    feat_groups = dict(tmp.groupby('cnt')['feat'].agg(lambda x: list(x)))
    return feat_groups

In [18]:
def normalize_columns_infer(df, means, stds):
    features = means.index.tolist()
    for feature in features:
        mean = means.loc[feature]
        std = stds.loc[feature]
        df[feature] = (df[feature] - mean) / std
    return df

In [17]:
from pathlib import Path
directory = 'spot_data_11_25'
date = '11_25'
lib = Path(f"../output/{directory}").mkdir(parents=True, exist_ok=True)
window = 30
file = f'target_{window}m_feature.csv'
corr = pd.read_csv(f'../output/feature_corr/{file}', header = 0, index_col = 0)

train_features = corr.iloc[:50].index.tolist()

In [13]:
train = False
df = pd.read_feather('../data/df_btc_with_features_5m_spot.feather')

df = df.loc[df['token'] == 'BTCUSDT'].reset_index(drop = True)

df = df.sort_values(by='open_time', ignore_index=True)

df['close_5m'] = df['close'].shift(-1)
df['close_t'] = df['close'].copy()

start_time = df['open_time'].min()
end_time = df['open_time'].max()
dates = df['open_time'].unique()
n = len(dates)
train_idx = int(0.7 * n)
valid_idx = int(0.9 * n)
train_end = dates[train_idx]
valid_end = dates[valid_idx]

train_df = df.loc[df['open_time'] < train_end].reset_index(drop=True)
valid_df = df.loc[(train_end <= df['open_time']) & (df['open_time'] < valid_end)].reset_index(drop=True)

train_df = pd.concat([train_df, valid_df], axis = 0)

test_df = df.loc[(df['open_time'] >= valid_end)].reset_index(drop=True)

valid_df = test_df.copy()

groups = pd.factorize(
    train_df['open_time'].dt.day.astype(str) + '_' + train_df['open_time'].dt.month.astype(str) + '_' + train_df[
        'open_time'].dt.year.astype(str))[0]

cv = tscv.PurgedGroupTimeSeriesSplit(
    n_splits=5,
    group_gap=31,
)

from joblib import load, dump
date = f'11_23'
directory = f'spot_data_11_25'
lookback = 5
horizon = 100
means = load(f'../output/{directory}/means_nbeats_huber_l={lookback}_h={horizon}.joblib')
std = load(f'../output/{directory}/std_return_nbeats_l={lookback}_h={horizon}joblib')
train_features_test = load(
    f'../output/{directory}/train_features_test_return_nbeats_l={lookback}_h={horizon}.joblib')

valid_df = normalize_columns_infer(valid_df, means, std)

lookback = 5
horizon = 100
batch_size = 4096
params = {'num_columns': len(train_features_test),
          'num_labels': 1,
          'lookback' : lookback,
          'horizon' : horizon,
          'batch_size' : batch_size,
          'hidden_units': [300, 200, 448, 256],
          'dropout_rates': [0.42409238408801436, 0.10431484318345882,
                            0.49230389137187497, 0.32024444956111164, 0.2716856145683449, 0.4379233941604448],
          'ls': 0,
          'lr': 1e-3,
          }



