In [4]:
import os
import sys
import pickle
import pandas as pd
import numpy as np
import talib as ta
from datetime import datetime, timedelta
#%matplotlib inline
import matplotlib.pyplot as plt
from IPython.display import Image, display_png
os.chdir('/home/shun/PycharmProjects/crypto-onibot')

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_colwidth', 1000)

In [206]:
def generate_ticker(executions):
    '''
    約定履歴から1秒ごとのtickerを作成
    約定履歴: timestamp, side, price, amount
    ticker: timestamp, bid, bid_volume, ask, ask_volume, open, high, low, close, volume
    '''
    executions.index = pd.to_datetime(executions.timestamp.apply(lambda x: datetime.fromtimestamp(x/1000)))

    executions = executions.resample('S').last()
    
    # bid, askを算出
    bids = executions.loc[executions.side == 'Sell'].resample('S').ffill()
    bids = bids.drop(['timestamp', 'side'], axis=1)
    asks = executions.loc[executions.side == 'Buy'].resample('S').ffill()
    asks = asks.drop(['timestamp', 'side'], axis=1)
    bids_asks = pd.merge(bids, asks, how='inner', left_index=True, right_index=True)
    bids_asks.columns = ['bid', 'bid_volume', 'ask', 'ask_volume']
    # OHLCVを算出   
    ohlcv = bids.price.resample('S').ohlc()
    ohlcv['volume'] = bids.amount.resample('S').sum()
    ohlcv['timestamp'] = pd.Series(ohlcv.index).apply(lambda x: datetime.timestamp(x) * 1000).values    
    # ticker作成
    ticker = pd.merge(bids_asks, ohlcv, how='inner', left_index=True, right_index=True)
    ticker['timestamp'] = pd.Series(ticker.index).apply(lambda x: datetime.timestamp(x) * 1000).values
    return ticker.reset_index(drop=True).ix[:, ['timestamp', 'bid', 'bid_volume', 'ask', 'ask_volume', 'open', 'high', 'low', 'close', 'volume']]

def generate_ohlcv(executions, candle_type):
    '''
    約定履歴から１分、５分、１時間のOHLCVを作成
    約定履歴: timestamp, side, price, amount
    OHLCV: timestamp, bid, bid_volume, ask, ask_volume, open, high, low, close, volume
    '''        
    executions.index = pd.to_datetime(executions.timestamp.apply(lambda x: datetime.fromtimestamp(x/1000)))

    # 1秒ごとにグルーピング
    executions = executions.resample('S').last()

    # bid, askを算出
    bids = executions.loc[executions.side == 'Sell'].resample('S').ffill()
    
    # OHLCVを作成
    if candle_type == '1s':
        freq = 'S'
    if candle_type == '1m':
        freq = 'T'
    elif candle_type == '5m':
        freq = '5T'
    elif candle_type == '1h':
        freq = 'H'
    
    ohlcv = bids.price.resample(freq).ohlc()
    ohlcv['volume'] = bids.amount.resample(freq).sum()
    ohlcv['timestamp'] = pd.Series(ohlcv.index).apply(lambda x: datetime.timestamp(x) * 1000).values
    return ohlcv

def generate_active_ohlcv(df, candle_type):
    '''
    1秒足から未確定足を含むOHLCVを作成
    df: DataFrame (index: datetimeindex, column: open, high, low, close, volume timestamp)
    candle_type: '1m' or '5m' or '1h'
    '''
    
    ohlcv = df.values
    open = ohlcv[0, 0]
    high = -1
    low = 999999
    close = ohlcv[0, 3]
    volume = ohlcv[0, 4]

    if candle_type == '1m':
        div = 60 * 1000
    elif candle_type == '5m':
        div = 60 * 5 * 1000
    elif candle_type == '1h':
        div = 60 * 60 * 1000
    
    active_ohlcv = []   
    for i in range(ohlcv.shape[0]):    
        row = ohlcv[i]

        # １分ごとにリセット
        timestamp = row[5]
        if timestamp % div == 0:
            open = row[0]
            high = -1
            low = 999999
            volume = 0

        close = row[3]
        volume += row[4]
        if row[1] > high:
            high = row[1]
        if row[2] < low:
            low = row[2]
        active_ohlcv.append([open, high, low, close, volume, timestamp])
        
    active_ohlcv = pd.DataFrame(active_ohlcv)
    active_ohlcv.columns = ['open', 'high', 'low', 'close', 'volume', 'timestamp']
    active_ohlcv.index = pd.to_datetime(active_ohlcv.timestamp.apply(lambda x: datetime.fromtimestamp(x / 1000)))
    return active_ohlcv

def generate_orderbook(executions):
    executions.index = pd.to_datetime(executions.timestamp.apply(lambda x: datetime.fromtimestamp(x/1000)))

    # 1秒ごとにグルーピング（nullは前回の値で補完）
    orderbook = executions.resample('S').ffill()
    orderbook['timestamp'] = pd.Series(orderbook.index).apply(lambda x: datetime.timestamp(x) * 1000).values

    # 指定範囲のデータを取り出す
    orderbook = orderbook.loc[(start_dt <= orderbook.index) & (orderbook.index < end_dt)]

In [84]:
import os
import fnmatch
from common.utils import str2dt, format_dt

def read_csvs(dirpath, start_dt, end_dt, pattern=None, prefix=None):
    files = sorted(os.listdir(dirpath))

    if pattern is not None:
        files = [file for file in files if fnmatch.fnmatch(file, pattern)]
    
    ret = None
    for file in files:
        if prefix is None:            
            dt = datetime.strptime(file, '%Y%m%d%H%M%S')
        else:
            dt = datetime.strptime(file.replace(prefix, ''), '%Y%m%d%H%M%S')
        
        if (start_dt <= dt) and (dt <= end_dt):
            print('read file: {}'.format(file))
            df = pd.read_csv(os.path.join(dirpath, file), header=None)            
            
            if ret is None:
                ret = df
            else:
                ret = pd.concat([ret, df])
    return ret

In [216]:
#executions = pd.read_csv('collect/executions/201903250100', names=['timestamp', 'datetime_unix', 'side', 'price', 'amount', 'datetime_jst', 'datetime_jst'])
start_dt = str2dt('2019-04-06 19:00:00')
end_dt = str2dt('2019-04-06 21:00:00')
executions = read_csvs('collect/executions', start_dt, end_dt, pattern='execution.*', prefix='execution.')
executions.columns = ['timestamp', 'datetime_utc_bitmex', 'side', 'price', 'amount', 'datetime', 'datetime_jst_bitmex']
executions = executions.drop(['datetime', 'datetime_utc_bitmex', 'datetime_jst_bitmex'], axis=1)
# 後で消す
executions['side'] = executions['side'].str.replace(' ', '')

ticker = generate_ticker(executions)
ohlcv = generate_ohlcv(executions, candle_type='1s')
active_ohlcv_1m = generate_active_ohlcv(ohlcv, candle_type='1m')
active_ohlcv_5m = generate_active_ohlcv(ohlcv, candle_type='5m')
active_ohlcv_1h = generate_active_ohlcv(ohlcv, candle_type='1h')

read file: execution.20190406193436
read file: execution.20190406203441


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


In [121]:
header = ['timestamp']
for i in range(25):
    for col in ['amount', 'price']:
        header.append(col + '{0:02d}'.format(i))
        
bids = read_csvs('collect/orderbook/bids', start_dt, end_dt, pattern='bid.*', prefix='bid.')
bids.columns = header
bids.index = pd.to_datetime(bids.timestamp.apply(lambda x: datetime.fromtimestamp(x/1000)))
bids = bids.resample('S').last()

asks = read_csvs('collect/orderbook/asks', start_dt, end_dt, pattern='ask.*', prefix='ask.')
asks.columns = header
asks.index = pd.to_datetime(asks.timestamp.apply(lambda x: datetime.fromtimestamp(x/1000)))
asks = asks.resample('S').last()

read file: bid.20190406193631
read file: bid.20190406203631


(7196, 51)

In [98]:
inago = pd.read_csv('collect/inago/inago.csv', sep='\t', names=['id', 'board_name', 'taker_side', 'volume', 'last_price', 'pair_currency', 'from_unix_time', 'to_unix_time', 'from_datetime', 'to_datetime', 'timestamp'])
inago.index = pd.to_datetime(inago.timestamp.apply(lambda x: datetime.fromtimestamp(x/1000)))

# 1秒ごとにサンプリング
# freq format: https://stackoverflow.com/questions/35339139/where-is-the-documentation-on-pandas-freq-tags
inago = inago.resample('S').last()
inago['timestamp'] = pd.Series(inago.index).apply(lambda x: datetime.timestamp(x) * 1000).values
# 指定範囲のデータを取り出す
inago = inago.loc[(start_dt <= inago.index) & (inago.index < end_dt)]

In [99]:
inago

Unnamed: 0_level_0,id,board_name,taker_side,volume,last_price,pair_currency,from_unix_time,to_unix_time,from_datetime,to_datetime,timestamp
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019-04-06 19:00:00,,,,,,,,,,,1.554545e+12
2019-04-06 19:00:01,,,,,,,,,,,1.554545e+12
2019-04-06 19:00:02,,,,,,,,,,,1.554545e+12
2019-04-06 19:00:03,,,,,,,,,,,1.554545e+12
2019-04-06 19:00:04,,,,,,,,,,,1.554545e+12
2019-04-06 19:00:05,,,,,,,,,,,1.554545e+12
2019-04-06 19:00:06,,,,,,,,,,,1.554545e+12
2019-04-06 19:00:07,,,,,,,,,,,1.554545e+12
2019-04-06 19:00:08,,,,,,,,,,,1.554545e+12
2019-04-06 19:00:09,,,,,,,,,,,1.554545e+12


In [None]:
ticker.to_csv('collect/format_data/ticker.csv', index=False)
ohlcv_1m.to_csv('collect/format_data/ohlcv_1m.csv', index=False)
ohlcv_5m.to_csv('collect/format_data/ohlcv_5m.csv', index=False)
ohlcv_1h.to_csv('collect/format_data/ohlcv_1h.csv', index=False)
orderbook.to_csv('collect/format_data/orderbook.csv', index=False)
inago.to_csv('collect/format_data/inago.csv', index=False)

# API調査

In [None]:
from trade_tools.my_api import API

EXCHANGE_NAME = 'bitmex'
PAIR = 'BTC/USD'
CANDLE_TYPE = '1m'
MAX_WAIT_TIME = 300
API = API(EXCHANGE_NAME)

In [None]:
tmp = orderbook.iloc[0]

bids = []
asks = []
for i in range(25):
    i_bid = 25 + i
    bid = tmp['price' + '{0:02d}'.format(i_bid)]
    bid_volume = tmp['amount' + '{0:02d}'.format(i_bid)]
    bids.append([bid, bid_volume])

    i_ask = 24 - i
    ask = tmp['price' + '{0:02d}'.format(i_ask)]
    ask_volume = tmp['amount' + '{0:02d}'.format(i_ask)]
    asks.append([ask, ask_volume])
formatted_orderbook = {'bids': bids, 'asks': asks, 'timestamp': tmp.timestamp}

In [None]:
dict(ticker.iloc[0])

In [None]:
orderbook.head()

In [None]:
API.fetch_ticker(PAIR)

In [None]:
datetime.fromtimestamp(datetime.now().timestamp() - 3600)

In [None]:
ohlcv_1m.head()

In [None]:
now = datetime.now()

In [None]:
now_timestamp = now.timestamp() * 1000

In [None]:
now

In [None]:
datetime.fromtimestamp(1553151600)

In [None]:
inago[~inago.id.isnull()]

In [None]:
orderbook1.head()

In [None]:
orderbook0.loc[orderbook0.side42 == 'Sell', ['side42', 'price42']]

In [None]:
orderbook0.iloc[4850:4860]