In [1]:
from binance_historical_data import BinanceDataDumper
import pandas as pd
import pathlib
from tqdm import tqdm

In [9]:
data_dumper = BinanceDataDumper(
    path_dir_where_to_dump = '../data/',
    asset_class = 'um',
    data_type = 'fundingRate',
    data_frequency = '5m'
)
x = data_dumper.dump_data(
    tickers = ['BTCUSDT'],
    date_start = None,
    date_end = None,
    is_to_update_existing = True,
    # tickers_to_exclude = ["UST"]
)

Tickers:   0%|          | 0/1 [00:00<?, ?it/s]

monthly files to download:   0%|          | 0/47 [00:00<?, ?files/s]

daily files to download:   0%|          | 0/3 [00:00<?, ?files/s]

In [None]:
def process_funding_rate_data(token_lists, time: str):
    """
    futures
    :param columns: for data columns
    :param token_lists: tokens to get data for
    :return:
    """
    path = pathlib.Path.cwd().parent / 'data' / "futures" / 'um' / 'monthly' / 'fundingRate'
    for token in token_lists:
        try:
            files = path / token / f'{time}'
            csv_files = files.glob('*.csv')
            dfs = []
            for file in csv_files:
                df = pd.read_csv(file, index_col=None, header = 0)
                dfs.append(df)
            df_all = pd.concat(dfs, axis=0, ignore_index=True)
        except Exception as e:
            print(f'Error for {token}: {e}')
    return df_all

In [14]:
def process_funding_data_spot(columns, token : str, time: str):
    """
    futures
    :param columns: for data columns
    :param token: single token to get data for (ie: BTCUSDT)
    :return:
    """
    monthly_path = pathlib.Path.cwd().parent / 'data' / "futures" / "um" / 'monthly' / 'fundingRate'
    all_paths = [monthly_path]
    # for token in token_lists:
    df_final = []
    for path in all_paths:
            try:
                files = path / token
                csv_files = files.glob('*.csv')
                dfs = []
                for file in tqdm(csv_files):
                    df = pd.read_csv(file, index_col=None, header = 0)
                    for col in columns:
                        df[col] = df[col].astype(float)
                    dfs.append(df)
                print(len(dfs))
                df_all = pd.concat(dfs, axis=0, ignore_index=True)
                df_all = df_all.rename({'calc_time' : 'open_time'}, axis = 1)
                df_final.append(df_all)
            except Exception as e:
                print(f'Error for {token}: {e}')
    df_final = pd.concat(df_final, axis = 0)
    df_final = df_final.drop_duplicates(subset = ['open_time'])
    return df_final

from ta import add_all_ta_features
def load_funding_rate_data(universe, columns, save = True, time = '1m'):
    dfs = {}
    sample = {}
    for token in tqdm(universe):
        df = process_funding_data_spot(columns, token, time)
        df['open_time'] = pd.to_datetime(df['open_time'], unit = 'ms')
        df['token'] = token
        df = df.sort_values(by = 'open_time', ignore_index = True)
        if save:
            df.to_feather(f'../data/processed_data/{token}_fundingrate.feather')
        dfs[token] = df
    return dfs

In [15]:
columns = ['calc_time', 'funding_interval_hours', 'last_funding_rate']
universe = ['BTCUSDT']

df = load_funding_rate_data(universe, columns, save = False)

  0%|          | 0/1 [00:00<?, ?it/s]
47it [00:00, 699.79it/s]
100%|██████████| 1/1 [00:00<00:00, 12.96it/s]

47





In [18]:
df['BTCUSDT'].to_feather('../data/df_btc_funding_rate.feather')

  if _pandas_api.is_sparse(col):
