In [36]:
import pandas as pd
import numpy as np
import json


import os
import subprocess
import threading

import requests

import time
import datetime

import ta

import pytz

from arctic import Arctic, TICK_STORE
from arctic.date import DateRange

store = Arctic('localhost')

if store.library_exists('daddy') == False:
    store.initialize_library('daddy', lib_type=TICK_STORE)

library = store['daddy']
library._chunk_size = 500000

def get_data(url, index, proxy):    
    global results
    global threads
        
    if proxy == None:
        res = requests.get(url, timeout=2)
    else:
        proxies = {
          "http": "http://" + proxy,
          "https": "https://" + proxy,
        }
        res = requests.get(url, proxies=proxies, timeout=2)
        
    results[index] = pd.DataFrame(json.loads(res.text))

def get_df(start_time, proxy=None, total_range=30):
    global threads
    global results
    
    start_time = pd.to_datetime(start_time).tz_localize(None)
    
    if start_time.date() == datetime.datetime.utcnow().date():
        urls = ["https://www.bitmex.com/api/v1/trade?symbol=XBTUSD&count={}&start={}&reverse=false&startTime={}".format(1000, i * 1000, start_time) for i in range(total_range)]
    else:
        urls = ["https://www.bitmex.com/api/v1/trade?symbol=XBTUSD&count={}&start={}&reverse=false&startTime={}&endTime={}".format(1000, i * 1000, start_time, pd.to_datetime(start_time.date() + pd.Timedelta(days=1))) for i in range(total_range)]
    
    threads = [None] * len(urls)
    results = [None] * len(urls)
    
    for i in range(len(threads)):
        threads[i] = threading.Thread(target=get_data, args=(urls[i], i, proxy))
        threads[i].start()
    
    for i in range(len(threads)):
        threads[i].join()

    df = pd.DataFrame()

    for curr_df in results:
        df = df.append(curr_df, ignore_index=True)
                    
    return df

def manual_scrape(scrape_from, sleep=True):
    print("Manual scrape for {}".format(scrape_from))
    proxy_df = pd.read_csv('proxies', sep=':', header=None)
    proxy_df.columns = ['proxy', 'port', 'username', 'password']

    proxy_df['proxy_string'] =  proxy_df['username'] + ":" + proxy_df['password'] + "@" + proxy_df['proxy'] + ":" + proxy_df['port'].astype(str)
    proxy_list = list(proxy_df['proxy_string'])
    at_once = len(proxy_list) + 1
    all_df = pd.DataFrame()
    completed = False
    
    while True:
        start_time = time.time()
        
        for i in range(at_once):
            if i == 0:
                curr_df = get_df(scrape_from)
            else:
                curr_df = get_df(scrape_from, proxy=proxy_list[i-1])
                
            all_df = all_df.append(curr_df, ignore_index=True)
            all_df = all_df.dropna(subset=['timestamp'], how='all')
            
            scrape_from = all_df.iloc[-1]['timestamp']
            print("Got {} data till {}".format(len(curr_df), scrape_from))
            
            if len(curr_df) < 1000:
                completed = True
                break
         
        total_time_taken = time.time() - start_time
        
        to_sleep = int(60 - total_time_taken) + 1
        
        if completed == True:
            break

        if to_sleep > 0:
            if sleep == True:
                print("Sleeping {} seconds".format(to_sleep))
                time.sleep(to_sleep)
        else:
            print("No need to sleep")
            
    
    all_df['timestamp'] = pd.to_datetime(all_df['timestamp'])
    all_df['timestamp'] = all_df['timestamp'].dt.tz_localize(None)
    all_df = all_df.sort_values('timestamp').reset_index(drop=True)
            
    return all_df

def aws_scrape(name):
    print("AWS Scrape for {}".format(name))
    url = "https://s3-eu-west-1.amazonaws.com/public.bitmex.com/data/trade/{}".format(name)
    r = requests.get(url)
    
    with open('temp', 'wb') as f:
        f.write(r.content)
        
    df = pd.read_csv('temp', compression='gzip')
    os.remove('temp')
    aws_df = df[df['symbol'] == 'XBTUSD']
    aws_df['timestamp'] = pd.to_datetime(aws_df['timestamp'], format="%Y-%m-%dD%H:%M:%S.%f")
    aws_df = aws_df.sort_values('timestamp').reset_index(drop=True)
    return aws_df

def get_bitmex_data(start, end, sleep=True):
    all_df = []

    for scrape_date in pd.date_range(start, end):
        if scrape_date.date() == datetime.datetime.utcnow().date() - pd.Timedelta(days=1):
            curr_time = datetime.datetime.utcnow()
            if curr_time.time() > datetime.time(5,41):
                df = aws_scrape(scrape_date.strftime("%Y%m%d.csv.gz"))
            else:
                df = manual_scrape(scrape_date, sleep=sleep)
        elif scrape_date.date() == datetime.datetime.utcnow().date():
            df = manual_scrape(scrape_date,  sleep=sleep)
        else:
            df = aws_scrape(scrape_date.strftime("%Y%m%d.csv.gz"))


        all_df.append(df)
    
    return pd.concat(all_df, axis=0)

In [37]:
def update_trades():
    end = pd.to_datetime(datetime.datetime.utcnow()).date()
    original_start = end - pd.Timedelta(days=20)
    
    try:
        start = pd.to_datetime(library.max_date('trades').astimezone(pytz.UTC)).tz_localize(None)
        
        if start.hour == 23 and start.minute >= 58:
            start = pd.to_datetime(start.date() + pd.Timedelta(days=1))
    except:
        start = original_start

    while True:
        try:
            end = pd.to_datetime(datetime.datetime.utcnow())

            print("{} to {}".format(start, end))
            df = get_bitmex_data(start, end)
            df = df[['timestamp', 'symbol', 'side', 'size', 'price', 'homeNotional', 'foreignNotional']]
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df = df.set_index('timestamp')
            df = df.tz_localize(tz='UTC')
            library.write('trades', df)               
            break
        except Exception as e:
            print("Exception: {}. Retrying in 5 secs".format(str(e)))
            time.sleep(5)
    


In [38]:
update_trades()

2021-01-19 14:06:02.445000 to 2021-01-19 14:17:19.697773
Manual scrape for 2021-01-19 14:06:02.445000
Got 2618 data till 2021-01-19T14:17:19.540Z


NB treating all values as 'exists' - no longer sparse


Got 9 data till 2021-01-19T14:17:22.222Z


In [39]:
def get_significant_traders(df):
    df = df[['timestamp', 'side', 'homeNotional', 'foreignNotional']]
    df = df.groupby(['timestamp', 'side']).sum() 
    df = df.reset_index()
    df = df[df['foreignNotional'] > 500]
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['price'] = df['foreignNotional']/df['homeNotional']
    df = df.sort_values('timestamp')
    df = df.drop_duplicates()
    return df

def get_features(curr_df):
    ser = {}
    curr_df = curr_df.sort_values('timestamp')
    
    if len(curr_df) > 0:
        ser['open'] = curr_df.iloc[0]['price']
        ser['high'] = curr_df['price'].max()
        ser['low'] = curr_df['price'].min()
        ser['close'] = curr_df.iloc[-1]['price']
        ser['volume'] = curr_df['foreignNotional'].sum()
    else:
        ser['open'] = np.nan
        ser['high'] = np.nan
        ser['low'] = np.nan
        ser['close'] = np.nan
        ser['volume'] = np.nan
        
    buy_orders = curr_df[curr_df['side'] == 'Buy']
    sell_orders = curr_df[curr_df['side'] == 'Sell']

    total_buy = buy_orders['homeNotional'].sum()
    total_sell = sell_orders['homeNotional'].sum()
    total = total_buy + total_sell

    ser['buy_percentage'] = total_buy/total
    ser['buy_volume'] = total_buy
    ser['all_volume'] = total
    
    readable_bins = []
    

    readable_bins = [0, 2, 10, np.inf]
        
    readable_labels = ['small', 'medium', 'large']
    curr_df['new_range'] = pd.cut(curr_df['homeNotional'], readable_bins, include_lowest=True, labels=readable_labels).astype(str)
    
        
    for curr_range in set(readable_labels):
        group = curr_df[curr_df['new_range'] == curr_range]
        ser["percentage_{}".format(curr_range)] = group['homeNotional'].sum()/total
        buy_orders = group[group['side'] == 'Buy']
        ser['buy_percentage_{}'.format(curr_range)] = (buy_orders['homeNotional'].sum())/group['homeNotional'].sum()

    
        
    return pd.Series(ser)

def get_features_from_sig(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    minute_only = df['timestamp'].dt.minute.astype(str)
    minute_only_two = minute_only.apply(lambda x: str(x)[1:]) #there is a mistake here.
    df = df[~((minute_only == '9') | (minute_only_two == '9') | (minute_only == '8')  | (minute_only_two == '8'))]

    features = df.groupby(pd.Grouper(key='timestamp', freq="10Min", label='left')).apply(get_features)
    features = features.reset_index()

    features['timestamp'] = pd.to_datetime(features['timestamp'])
    features = features.drop_duplicates(subset=['timestamp'])
    features = features.sort_values('timestamp')
    features = features.dropna()
    return features

In [40]:
def get_intervaled_date(startTime):
    time_df = pd.DataFrame(pd.Series({'Time': startTime})).T
    return time_df.groupby(pd.Grouper(key='Time', freq="10Min", label='left')).sum().index[0]

In [41]:
last_date = pd.to_datetime(library.max_date('trades').astimezone(pytz.UTC)).tz_localize(None)

In [42]:
last_date

Timestamp('2021-01-19 14:17:22.222000')

In [43]:

minute = str(last_date.time().minute)

if len(minute) == 1:
    minute_only = int(minute)
else:
    minute_only = int(minute[1:])
    
if (minute_only < 8):
    have_till_calc = last_date - pd.Timedelta(minutes=10)
else:
    have_till_calc = last_date

In [44]:
have_till = get_intervaled_date(have_till_calc)

In [45]:
have_till

Timestamp('2021-01-19 14:00:00', freq='10T')

In [46]:
min_date = pd.to_datetime(library.min_date('trades').astimezone(pytz.UTC)).tz_localize(None)
startTime = get_intervaled_date(min_date)

if os.path.isfile('data/features.csv'):
    startTime = pd.to_datetime(pd.read_csv('data/features.csv').iloc[-1]['timestamp']) + pd.Timedelta(minutes=10)



In [47]:
startTime

Timestamp('2021-01-19 14:00:00')

In [48]:
have_till = have_till.tz_localize(tz='UTC')

In [49]:
startTime = startTime.tz_localize(tz='UTC')

In [50]:
#there is some problem with appending features. discover and fix

In [51]:
if have_till + pd.Timedelta(minutes=10) != startTime:
    df = library.read('trades', date_range = DateRange(start=startTime, end=have_till + pd.Timedelta(minutes=10)))
    df = df.tz_convert('UTC').tz_localize(None)
    df = df.reset_index()

    df = df.rename(columns={'index': 'timestamp'})
    
    #calculate and save features
    df = get_significant_traders(df)
    features = get_features_from_sig(df)

    features['change'] = ((features['close'] - features['open'])/features['open']) * 100
    features = features[['timestamp', 'open', 'high', 'low', 'close', 'volume', 'change', 'percentage_large', 'buy_percentage_large']]
    
    if os.path.isfile('data/features.csv'):
        old_features = pd.read_csv('data/features.csv')
        old_features['timestamp'] = pd.to_datetime(old_features['timestamp'])
        features = pd.concat([old_features, features])
        features = features.drop_duplicates(subset=['timestamp']).reset_index(drop=True)

    features['macd'] = ta.trend.macd_signal(features['close'])
    features['rsi'] = ta.momentum.rsi(features['close'])
    
    features.to_csv('data/features.csv', index=None)

In [52]:
features = pd.read_csv('data/features.csv')
features['timestamp'] = pd.to_datetime(features['timestamp'])
dupe = features.iloc[-1]
dupe['timestamp'] = dupe['timestamp'] + pd.Timedelta(minutes=10)
features = features.append(dupe, ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dupe['timestamp'] = dupe['timestamp'] + pd.Timedelta(minutes=10)


In [53]:
from scipy.ndimage import gaussian_filter
import requests

In [54]:
def update_price():
    start_time = "2020-01-01"

    if os.path.isfile("data/btc_daily.csv"):
        start_time = pd.read_csv('data/btc_daily.csv').iloc[-1]['timestamp']

    if (pd.to_datetime(start_time).date() < pd.Timestamp.utcnow().date()):
        try:
            new_url = 'https://www.bitmex.com/api/v1/trade/bucketed?binSize=1d&partial=false&symbol=XBTUSD&count=500&reverse=false&startTime={}'.format(start_time)
            res = requests.get(new_url)
            price_df = pd.DataFrame(json.loads(res.text))
            price_df['timestamp'] = pd.to_datetime(price_df['timestamp'])
            price_df = price_df.set_index('timestamp').tz_localize(None).reset_index()


            if os.path.isfile("data/btc_daily.csv"):
                old_df = pd.read_csv("data/btc_daily.csv")
                old_df['timestamp'] = pd.to_datetime(old_df['timestamp'])
                df = pd.concat([old_df, price_df])
                df = df.drop_duplicates(subset=['timestamp'])
                df.to_csv('data/btc_daily.csv', index=None)
            else:
                price_df.to_csv('data/btc_daily.csv', index=None)
        except Exception as e:
            print("Exception in parameter performer: {}".format(str(e)))
    else:
        pass

In [55]:
#look at file and update if required
def get_trends():
    update_price()
    df = pd.read_csv("data/btc_daily.csv")
    df = df[['timestamp', 'close']]
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df["30D_volatility"] = df['close'].rolling(30).std()/10
    df['30D_volatility'] = df['30D_volatility'].fillna(method='bfill').fillna(method='ffill')

    gaussian_vols = []

    for idx, row in df.iterrows():
        gaussian_vols.append(gaussian_filter(df[:idx+1]['30D_volatility'], 3.)[-1])

    df['30D_volatility'] = gaussian_vols
    
    price_df = df.copy()
    curr_group = ""
    new_price_df = pd.DataFrame()

    for i in range(1, len(price_df)):
        row = price_df.iloc[i]
        curr_vol = price_df.iloc[i]['30D_volatility']
        prev_vol = price_df.iloc[i-1]['30D_volatility']
        three_vol = price_df.iloc[i-2]['30D_volatility']

        if pd.isnull(prev_vol) == False:
            if curr_group == "":
                curr_group = price_df.iloc[i]['timestamp']


            if (three_vol - prev_vol) * (prev_vol - curr_vol) < 0:
                curr_group = price_df.iloc[i]['timestamp']



            row['curr_group'] = curr_group
            new_price_df = new_price_df.append(row, ignore_index=True)
            
    return new_price_df

In [56]:
trends = get_trends()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  row['curr_group'] = curr_group
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [57]:
curr_group = trends.iloc[-1]['curr_group'].date()

In [58]:
last_date = features.iloc[-1]['timestamp'].date()

In [59]:
if last_date.day - curr_group.day < 4:
    curr_group = last_date - pd.Timedelta(days=4)

In [60]:
curr_group =pd.to_datetime(curr_group)

In [61]:
features = features[features['timestamp'] >= curr_group]

In [62]:
from algos.daddy.backtest import perform_backtest

In [63]:
parameters = json.load(open('algos/daddy/parameters.json'))

In [64]:
run = perform_backtest(features, parameters)

In [65]:
run[0].get_logs()

(                   Date       Value
 0   2021-01-15 00:00:00  500.000000
 1   2021-01-15 00:10:00  500.000000
 2   2021-01-15 00:30:00  500.000000
 3   2021-01-15 00:40:00  500.000000
 4   2021-01-15 01:00:00  500.000000
 ..                  ...         ...
 574 2021-01-19 13:30:00  496.842656
 575 2021-01-19 13:40:00  498.976469
 576 2021-01-19 13:50:00  498.976469
 577 2021-01-19 14:00:00  498.619514
 578 2021-01-19 14:10:00  498.619514
 
 [579 rows x 2 columns],
                  Date Type         Price  Total Spent  Comission
 0 2021-01-19 04:20:00  BUY  36662.390029   474.897104   7.123457,
 Empty DataFrame
 Columns: [Profit, Date, Value, original_value, pct_change]
 Index: [],
 0)

In [66]:
analysis = run[0].analyzers.getbyname('tradeanalyzer').get_analysis()

In [67]:
analysis

AutoOrderedDict([('total', AutoOrderedDict([('total', 1), ('open', 1)]))])

In [None]:
#run at start. Then every minute

In [None]:
#run_backtest and verify after 8th minute

In [None]:
if 'open' in analysis['total']:
    if analysis['total']['open'] == 1 and current_pos == "NONE":
        print("Opened position from backtest_verification")
        lt.fill_order('buy')
    elif analysis['total']['open'] == 0 and current_pos == "OPEN":
        print("Closed position from backtest_verification")
        # lt.close_stop_order()
        lt.fill_order('sell')


In [9]:
#run update_trade every 7th minute and at start

In [11]:
#after done, run update trade again and backtest verify