In [1]:
%run ../../utils/commonImports.py
%run ../../utils/tradingImports.py
%matplotlib inline

from base import supres

# Load Data

In [2]:
# dropbox_dir = 'D:\\Dropbox\\My work\\krypl-project'
dropbox_dir = '/Users/david.vesely/Dropbox/My work/krypl-project'
def read_data(pair):
    db = os.path.join(dropbox_dir,'sqlite', 'ploniex-chart-data', f'{pair}.db')
    data = load_trading_data(db, 'chart_data', from_date='2015-01-01', period='30min')
    data['ohlc4'] = (data['open'] + data['close'] + data['high'] + data['low']) / 4
    return data

# Core functions

In [3]:
def get_supports(data, window_size):
    supports, _ = supres(data['ohlc4'].values, window_size)
    supports = pd.DataFrame(supports, columns=['index', 'support'])
    support_vals = supports['support'].values
    size = support_vals.shape[0]
    _return = (support_vals[1:] / support_vals[:size-1]) - 1
    _return = np.insert(_return, 0, 0)
    supports['return1'] = _return
    return supports

In [4]:
def labels(supports, ohlc, target, stop_loss):
    cols = ['low', 'high', 'close']
    low_c, high_c, close_c = 0, 1, 2
    np_ohlc = ohlc[cols].values

    def get_label(start_i, price):
        for j in range(start_i, np_ohlc.shape[0]):
            r_low = (np_ohlc[j, low_c] / price) - 1
            r_high = (np_ohlc[j, high_c] / price) - 1
            if r_low <= stop_loss:
                return 'stop'
            if r_high >= target:
                return 'target'
        return 'unknown'

    

    labels = []
    for i in range(supports.shape[0]):
        row_s = supports.iloc[i, :]
        start = int(row_s['index'] + 1)
        price = np_ohlc[start, close_c]
        label = get_label(start+1, price)
        labels.append(label)
    
    return pd.Series(labels)

In [5]:
def target_hits(supports, _return):
    labels = supports[supports['return1'] <= _return].reset_index()['label']
    distb = labels.value_counts().reset_index()
    distb.columns = ['label', 'cnt']
    distb['rel'] = distb['cnt'] / distb['cnt'].sum()
    target = distb[distb['label'] == "target"]
    if target.shape[0] == 0:
        return 0., 0.
    
    return target['cnt'].iloc[0], target['rel'].iloc[0]

# Parallelize

In [6]:
from multiprocessing import Pool
import time
from tqdm import *

def run_parallel(f, args, n_process=10):
    results = []
    with Pool(processes=n_process) as p:
        all_args = list(args)
        with tqdm_notebook(enumerate(p.imap_unordered(f, all_args)), leave=False, total=len(all_args)) as pbar:
            for i, x in pbar:
                results.append(x)
                pbar.update()
                
    return results

In [7]:
def dict_from_list(l):
    d = {}
    for row in l:
        d_inner = d
        for k in row[:-2]:
            if k not in d_inner.keys():
                d_inner[k] = {}
            d_inner = d_inner[k]
        d_inner[row[-2]] = row[-1]
    return d

# Experiment setup

In [8]:
train_coins = pd.read_csv('pairs_train.tsv', header=None)[0].tolist()
window_sizes = [4, 6, 8, 10, 12, 24, 30, 48]
targets = list(np.linspace(0, 0.13, 14))[1:]
stop_losses = list(-np.linspace(0, 0.1, 11))[1:]
returns = list(-np.linspace(0, 0.1, 11))[1:]

loops = len(train_coins) * len(window_sizes) * len(targets) * len(stop_losses) * len(returns)
loops

312000

# Calc Supports

In [9]:
data_dict = {pair: read_data(pair) for pair in train_coins}

In [10]:
from itertools import product

def supports_f(args):
    pair, window_size = args
    return pair, window_size, get_supports(data_dict[pair], window_size)

args = product(train_coins, window_sizes)
results = run_parallel(supports_f, args)
supports_dict = dict_from_list(results)
save_model(supports_dict, 'supports.pkl')

HBox(children=(IntProgress(value=0, max=240), HTML(value='')))



# Calc support labels

In [18]:
supports_dict = load_model('supports.pkl')

In [19]:
from itertools import product

def labels_f(args):
    pair, window_size, target, stop_loss = args
    data = data_dict[pair]
    supports = supports_dict[pair][window_size]
    return pair, window_size, target, stop_loss, labels(supports, data, target, stop_loss)

args = product(train_coins, window_sizes, targets, stop_losses)
results = run_parallel(labels_f, args)
labels_dict = dict_from_list(results)
save_model(labels_dict, 'labels.pkl')

HBox(children=(IntProgress(value=0, max=31200), HTML(value='')))



# Calc Hits

In [20]:
supports_dict = load_model('supports.pkl')
labels_dict = load_model('labels.pkl')

In [21]:
from itertools import product

def target_hits_f(args):
    pair, window_size, target, stop_loss, _return = args
    data = data_dict[pair]
    supports = supports_dict[pair][window_size].copy()
    supports['label'] = labels_dict[pair][window_size][target][stop_loss]
    hits, hits_rel = target_hits(supports, _return)
    
    return pair, window_size, target, stop_loss, _return, hits, hits_rel

args = product(train_coins, window_sizes, targets, stop_losses, returns)
results = run_parallel(target_hits_f, args)
columns = ['pair', 'window_size', 'target', 'stop_loss', '_return', 'target_hits', 'target_hits_rel']
result_df = pd.DataFrame(results, columns=columns)

rpath = os.path.join(dropbox_dir,'result', 'base-strategy-learn.tsv')
write_tsv(result_df, rpath)

HBox(children=(IntProgress(value=0, max=312000), HTML(value='')))



# Tmp

In [13]:
data_dict['BTC_LTC']['date'].min()

'2015-01-01 00:00:00'

In [18]:
df = pd.DataFrame()
for pair, data in data_dict.items():
    df = df.append(pd.DataFrame([[pair, data['date'].min()]]))
df.columns = ['pair', 'min_date']

In [20]:
df.sort_values('min_date', ascending=False)

Unnamed: 0,pair,min_date
0,BTC_FOAM,2018-12-11 01:00:00
0,BTC_BCHABC,2018-11-08 01:00:00
0,BTC_BCHSV,2018-11-08 00:30:00
0,BTC_MANA,2018-10-12 16:00:00
0,BTC_BAT,2018-08-22 16:00:00
0,BTC_KNC,2018-08-15 16:00:00
0,BTC_STORJ,2017-10-27 02:00:00
0,BTC_GAS,2017-09-14 18:30:00
0,BTC_OMG,2017-09-12 02:30:00
0,BTC_GNT,2017-02-18 03:30:00
