In [3]:
import pandas as pd

# Config

In [4]:
import yaml


config_path = '/Users/ab011sw/config/binance.yaml'
API_NAME = 'history-download'
with open(config_path, 'r') as stream:
    api_config = yaml.safe_load(stream)[API_NAME]

# Client

In [5]:
from binance.client import Client

client = Client(**api_config)

# Symbols

In [6]:
info = client.get_exchange_info()
pairs = [symbol_info['symbol'] for symbol_info in info['symbols']]

In [7]:
[p for p in pairs if 'BTC' in p and 'USD' in p]

['BTCUSDT',
 'BTCTUSD',
 'TUSDBTC',
 'BTCUSDC',
 'BTCUSDS',
 'BTCBUSD',
 'BTCUPUSDT',
 'BTCDOWNUSDT',
 'SUSDBTC']

In [8]:
[i for i in info['symbols'] if i['symbol'] == 'BTCUSDT'][0]

{'symbol': 'BTCUSDT',
 'status': 'TRADING',
 'baseAsset': 'BTC',
 'baseAssetPrecision': 8,
 'quoteAsset': 'USDT',
 'quotePrecision': 8,
 'quoteAssetPrecision': 8,
 'baseCommissionPrecision': 8,
 'quoteCommissionPrecision': 8,
 'orderTypes': ['LIMIT',
  'LIMIT_MAKER',
  'MARKET',
  'STOP_LOSS_LIMIT',
  'TAKE_PROFIT_LIMIT'],
 'icebergAllowed': True,
 'ocoAllowed': True,
 'quoteOrderQtyMarketAllowed': True,
 'isSpotTradingAllowed': True,
 'isMarginTradingAllowed': True,
 'filters': [{'filterType': 'PRICE_FILTER',
   'minPrice': '0.01000000',
   'maxPrice': '1000000.00000000',
   'tickSize': '0.01000000'},
  {'filterType': 'PERCENT_PRICE',
   'multiplierUp': '5',
   'multiplierDown': '0.2',
   'avgPriceMins': 5},
  {'filterType': 'LOT_SIZE',
   'minQty': '0.00000100',
   'maxQty': '9000.00000000',
   'stepSize': '0.00000100'},
  {'filterType': 'MIN_NOTIONAL',
   'minNotional': '10.00000000',
   'applyToMarket': True,
   'avgPriceMins': 5},
  {'filterType': 'ICEBERG_PARTS', 'limit': 10},
  

# Data

columns from: https://binance-docs.github.io/apidocs/spot/en/#kline-candlestick-data

In [9]:
pair = "BTCUSDT"

klines = client.get_historical_klines(pair, Client.KLINE_INTERVAL_1MINUTE, "24 hours ago UTC")

In [10]:
columns =  [
    "Open time",
    "Open",
    "High",
    "Low",
    "Close",
    "Volume",
    "Close time",
    "Quote asset volume",
    "Number of trades",
    "Taker buy base asset volume",
    "Taker buy quote asset volume",
    "Ignore"
]

In [11]:
candles = pd.DataFrame(klines, columns=columns)

In [12]:
candles

Unnamed: 0,Open time,Open,High,Low,Close,Volume,Close time,Quote asset volume,Number of trades,Taker buy base asset volume,Taker buy quote asset volume,Ignore
0,1607976720000,19190.65000000,19199.00000000,19190.64000000,19194.66000000,34.22014400,1607976779999,656937.30010244,415,18.21353400,349652.86203602,0
1,1607976780000,19194.67000000,19194.67000000,19189.39000000,19190.24000000,17.85175200,1607976839999,342635.02880755,405,3.47120900,66621.23558397,0
2,1607976840000,19190.25000000,19190.97000000,19180.54000000,19181.42000000,25.54533900,1607976899999,490172.89858151,489,7.85839900,150782.74053592,0
3,1607976900000,19181.42000000,19185.33000000,19181.42000000,19181.86000000,12.69451600,1607976959999,243524.09647032,451,6.86488200,131687.83947310,0
4,1607976960000,19181.85000000,19185.33000000,19181.42000000,19183.22000000,10.65282000,1607977019999,204353.98535389,389,7.19150700,137955.62996196,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1435,1608062820000,19530.68000000,19531.47000000,19521.58000000,19525.65000000,13.68927600,1608062879999,267310.43082498,430,4.17368800,81495.49252720,0
1436,1608062880000,19525.64000000,19525.65000000,19510.55000000,19514.13000000,21.85656700,1608062939999,426574.66314004,453,5.33785300,104181.16605277,0
1437,1608062940000,19514.12000000,19519.94000000,19511.86000000,19513.78000000,56.64855800,1608062999999,1105548.83445669,558,48.69084600,950252.32052802,0
1438,1608063000000,19513.79000000,19517.00000000,19508.23000000,19508.66000000,32.38716800,1608063059999,631988.19940126,543,8.78262500,171381.03762719,0


# Download History

In [27]:
def minutes_between(date_1, date_2):
    time_delta = (date_2 - date_1)
    total_seconds = time_delta.total_seconds()
    return int(total_seconds/60)

In [2]:
from pathlib import Path

root = '/Users/ab011sw/data/binance/raw/'

def save_bulk(bulk):
    df = pd.DataFrame(bulk, columns=columns)
    min_ts, max_ts = df['Open time'].min(), df['Open time'].max()
    _dir = f'{root}/pair={pair}'
    Path(_dir).mkdir(parents=True, exist_ok=True)
    df.to_parquet(f'{_dir}/{min_ts}_{max_ts}')

In [30]:
from itertools import count
from tqdm import tqdm
import datetime as dt


date_from = dt.date(2016, 1, 1)
date_from_str = date_from.strftime("%Y-%m-%d")
date_to = dt.datetime.now()

bulk_size = 44000

bulk = []
binance_generator = client.get_historical_klines_generator(pair, Client.KLINE_INTERVAL_1MINUTE, date_from_str)
kline = next(binance_generator)

first_occurence = dt.datetime.fromtimestamp(kline[0]/1000)
minutes = minutes_between(first_occurence, date_to)

for i, kline in tqdm(zip(count(), binance_generator), total=minutes):
    bulk.append(kline)
    if i % bulk_size == 0 and i > 0:
        save_bulk(bulk)
        bulk = []

100%|█████████▉| 1744874/1751966 [46:21<00:11, 627.41it/s] 


# Test data

In [46]:
data = pd.read_parquet(f'{root}/pair={pair}')\
    .drop_duplicates()\
    .sort_values('Open time')
data['open_dt'] = (data['Open time'] / 1000).apply(dt.datetime.fromtimestamp)
data['close_dt'] = (data['Close time'] / 1000).apply(dt.datetime.fromtimestamp)

In [47]:
open_to_close = data['Close time'] - data['Open time']
open_to_close.value_counts()

59999    1715989
999            1
25551          1
60000          1
46694          1
32286          1
14838          1
14788          1
20798          1
13524          1
45210          1
21646          1
13419          1
dtype: int64

In [48]:
data[open_to_close != 59999][['open_dt', 'close_dt']]

Unnamed: 0,open_dt,close_dt
29518,2017-09-06 17:59:00.000,2017-09-06 18:00:00.000
156659,2017-12-04 07:00:00.000,2017-12-04 07:00:20.798
177060,2017-12-18 11:00:20.799,2017-12-18 11:00:42.445
177196,2017-12-18 13:29:00.000,2017-12-18 13:29:13.419
201043,2018-01-04 04:00:00.000,2018-01-04 04:00:14.838
251166,2018-02-08 01:28:00.000,2018-02-08 01:28:14.788
252367,2018-02-10 06:59:14.789,2018-02-10 06:59:59.999
253673,2018-02-11 05:00:00.000,2018-02-11 05:00:00.999
458636,2018-07-04 02:22:00.000,2018-07-04 02:22:25.551
944560,2019-06-07 23:13:00.000,2019-06-07 23:13:13.524


In [53]:
shifted = data['Open time'] - data['Open time'].shift(1)
shifted.value_counts(dropna=False)

60000.0        1715975
25260000.0           2
36060000.0           2
12660000.0           2
28860000.0           1
120674789.0          1
3660000.0            1
3720000.0            1
240000.0             1
3900000.0            1
20799.0              1
6360000.0            1
7260000.0            1
7740000.0            1
7560000.0            1
27480000.0           1
8460000.0            1
9060000.0            1
819201.0             1
945211.0             1
21300000.0           1
21660000.0           1
NaN                  1
2100000.0            1
Name: Open time, dtype: int64

In [51]:
data['shifted'] = shifted

In [55]:
watch_i = 29519
_range = 5
data.iloc[watch_i-_range:watch_i+_range, :][['open_dt', 'close_dt', 'shifted']]

Unnamed: 0,open_dt,close_dt,shifted
29514,2017-09-06 17:55:00,2017-09-06 17:55:59.999,60000.0
29515,2017-09-06 17:56:00,2017-09-06 17:56:59.999,60000.0
29516,2017-09-06 17:57:00,2017-09-06 17:57:59.999,60000.0
29517,2017-09-06 17:58:00,2017-09-06 17:58:59.999,60000.0
29518,2017-09-06 17:59:00,2017-09-06 18:00:00.000,60000.0
29519,2017-09-07 01:00:00,2017-09-07 01:00:59.999,25260000.0
29520,2017-09-07 01:01:00,2017-09-07 01:01:59.999,60000.0
29521,2017-09-07 01:02:00,2017-09-07 01:02:59.999,60000.0
29522,2017-09-07 01:03:00,2017-09-07 01:03:59.999,60000.0
29523,2017-09-07 01:04:00,2017-09-07 01:04:59.999,60000.0


In [50]:
data[shifted != 60000.0]

Unnamed: 0,Open time,Open,High,Low,Close,Volume,Close time,Quote asset volume,Number of trades,Taker buy base asset volume,Taker buy quote asset volume,Ignore,open_dt,close_dt
0,1502942460000,4261.48,4261.48,4261.48,4261.48,0.0,1502942519999,0.0,0,0.0,0.0,7960.45262989,2017-08-17 06:01:00.000,2017-08-17 06:01:59.999
29519,1504738800000,4619.43,4619.64,4619.43,4619.64,2.226911,1504738859999,10287.31182534,3,2.226911,10287.31182534,11211.97366827,2017-09-07 01:00:00.000,2017-09-07 01:00:59.999
156660,1512367220799,11478.0,11478.0,11478.0,11478.0,0.0,1512367280798,0.0,0,0.0,0.0,37184.17978073,2017-12-04 07:00:20.799,2017-12-04 07:01:20.798
177061,1513592040000,18680.91,18680.95,18657.25,18680.95,0.276016,1513592099999,5152.68190498,15,0.103116,1926.30162748,77278.41320647,2017-12-18 11:14:00.000,2017-12-18 11:14:59.999
177197,1513604040000,19161.0,19201.0,18850.0,18850.04,40.169844,1513604099999,761528.22713681,223,10.310058,195818.86132983,77667.68813335,2017-12-18 14:34:00.000,2017-12-18 14:34:59.999
201044,1515042360000,15119.97,15119.97,14850.0,14875.34,57.675828,1515042419999,861968.00795446,287,23.577608,351168.62390702,0.0,2018-01-04 06:06:00.000,2018-01-04 06:06:59.999
251167,1518170354789,7789.9,8230.46,7789.9,8230.46,148.475418,1518170414788,1197234.89127694,715,121.867435,980151.79121367,0.0,2018-02-09 10:59:14.789,2018-02-09 11:00:14.788
252368,1518243300000,8842.98,8905.0,8842.98,8894.04,29.91588,1518243359999,265705.89478449,452,22.961683,203905.46838981,0.0,2018-02-10 07:15:00.000,2018-02-10 07:15:59.999
253674,1518323700000,7976.74,8098.0,7976.49,8077.23,135.34653,1518323759999,1090892.72458084,986,60.310939,485225.54144411,0.0,2018-02-11 05:35:00.000,2018-02-11 05:35:59.999
447919,1530014400000,6228.0,6228.0,6180.0,6190.0,73.507996,1530014459999,456168.43432408,322,26.035458,161551.45178713,0.0,2018-06-26 14:00:00.000,2018-06-26 14:00:59.999
