In [1]:
# -*- coding:utf-8 -*-
"""
直接下载bfx数据
"""
import os
import datetime
import time
import ccxt
import json
import pandas as pd
from pathlib import Path
from enum import Enum
from typing import List

In [2]:
def save_to_csv(data: List, dbpath, file_name):
    """
    存储csv
    """
    file_path = str(Path.joinpath(dbpath,f'{file_name}').resolve())
    df = pd.DataFrame(
        data=data,
        columns=['timestamp', 'open', 'high', 'low', 'close', 'volume']
    )

    if os.path.exists(file_path):
        df.to_csv(file_path, index=None, mode='a', header=False)  # 保存文档，如果已存在，则不保存列名
    else:
        df.to_csv(file_path, index=None, mode='a', header=True) # 保存文档，如果不存在，则创建个新的，保留header

    return len(df)

In [3]:
def bfx2(key_path):
    """
    """
    with open(key_path, mode='r', encoding='utf-8') as f:
        settings = json.load(f)
        apikey = settings["datafeed.apiKey"]
        secret = settings["datafeed.apiSecret"]

    bfx_exchange = ccxt.bitfinex2(
        {
            'apiKey': apikey,
            'secret': secret,
            'timeout': 15000,
            'enableRateLimit': 4000,  # 统一交易所属性
        }
    )
    return bfx_exchange

In [4]:
def get_his_data(
        start_date,
        end_date,
        dbpath:Path = Path.cwd(),
        file_name: str = None,
        symbol: str = None,
        interval: str = None,
        limit: int = 3000
    ) -> None:
    key_path = Path.joinpath(dbpath,".vntrader","vt_setting.json")
    bfx = bfx2(key_path=key_path)

    if start_date is None:
        start_date = datetime.datetime.strptime('2023/04/20','%Y/%m/%d')
    if end_date is None:
        end_date = datetime.datetime.strptime('2024/04/20','%Y/%m/%d')

    # 下载数据
    since = int(time.mktime(start_date.timetuple()))*1000
    end = int(time.mktime(end_date.timetuple()))*1000
    data = []
    count = 0
    file_path = str(Path.joinpath(dbpath,f'{file_name}').resolve())
    if os.path.exists(file_path):
        with open(file_path,'r') as f:
            lines = f.readlines()
            last_index = int(lines[-1].split(',')[0])
            if last_index > since:
                since = int(last_index + 60*1000)

    while True:
        if end - since < 60*1000:
            count_tmp = save_to_csv(data=data,dbpath=dbpath,file_name=file_name)
            print(f'end-since = {end-since}')
            break
        data_tmp = bfx.fetch_ohlcv(symbol = symbol,timeframe = interval,since = since,limit = limit,params = {})
        time.sleep(6)
        data += data_tmp
        since = int(data_tmp[-1][0]+60*1000)
        if len(data) > 10000:
            count_tmp = save_to_csv(data=data,dbpath=dbpath,file_name=file_name)
            count += count_tmp
            data = []

In [3]:
import pandas as pd
import os
def preprocess_data(Data_path,symbol):
    path = os.path.join(Data_path,symbol,'1m')
    data_list = os.listdir(path=path)
    df = None
    
    print(os.path.join(path , data_list[100]))
    for i in range(len(data_list)-1):
        data_tmp = pd.read_csv(os.path.join(path , data_list[i]),compression='zip').iloc[:,:6]
        data_tmp.columns = ['timestamp','open','high','low','close','volume']
        if df is None:
            df = data_tmp
        else:
            df = pd.concat([df,data_tmp],ignore_index=True)
    print(df.head(),df.tail())
    df.to_csv('binance-public-data-master//binance-public-data-master//python//data' + '//' + symbol + '.csv')


In [18]:
data_path = 'binance-public-data-master//binance-public-data-master//python//data//futures//um//daily//klines'
preprocess_data(Data_path=data_path,symbol='ETHUSDT')

binance-public-data-master//binance-public-data-master//python//data//futures//um//daily//klines\ETHUSDT\1m\ETHUSDT-1m-2020-04-10.zip
       timestamp    open    high     low   close   volume
0  1577836860000  128.95  129.04  128.93  128.94  695.566
1  1577836920000  128.93  128.97  128.92  128.92  131.902
2  1577836980000  128.97  128.98  128.93  128.96  455.572
3  1577837040000  128.95  128.95  128.76  128.81  866.117
4  1577837100000  128.78  128.81  128.71  128.75  265.479             timestamp     open     high      low    close    volume
549698  1610841300000  1238.36  1239.92  1233.59  1234.08  1297.885
549699  1610841360000  1234.14  1237.55  1233.52  1235.62   951.354
549700  1610841420000  1235.61  1238.33  1235.01  1235.01  1230.220
549701  1610841480000  1235.04  1235.04  1230.40  1230.91  3283.090
549702  1610841540000  1230.97  1232.79  1229.64  1230.19  3080.730


In [19]:
df = pd.read_csv('binance-public-data-master//binance-public-data-master\python\data\ETHUSDT.csv',index_col=0)
df['datetime'] = (df.timestamp/1000).apply(datetime.utcfromtimestamp)
del df['timestamp']
print(df.head())
df.to_csv('ETHUSDT.csv')

     open    high     low   close   volume            datetime
0  128.95  129.04  128.93  128.94  695.566 2020-01-01 00:01:00
1  128.93  128.97  128.92  128.92  131.902 2020-01-01 00:02:00
2  128.97  128.98  128.93  128.96  455.572 2020-01-01 00:03:00
3  128.95  128.95  128.76  128.81  866.117 2020-01-01 00:04:00
4  128.78  128.81  128.71  128.75  265.479 2020-01-01 00:05:00
