# EDA perpetual futures data

There are three files with historical data:
- `<symbol>-<timeframe>-futures.feather`: contains OHLCV historical data.
- `<symbol>-<timeframe>-funding_rate.feather`: contains historical data.
- `<symbol>-<timeframe>-mark.feather`: contains historical data.

In [None]:
from pathlib import Path

import pandas as pd
import plotly.express as px

In [None]:
import logging

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

In [None]:
DATA_PATH = Path("/path/to/data/binance-futures")
SYMBOL_MARKET = "BTC-USDT:USDT"

In [None]:
def get_feather_path(_data_path: Path, _data_type: str, _timeframe: str, _symbol_market: str) -> Path:
    """Gets the path to the feather file for the given symbol_market, timeframe and data type.

    - Normalize symbol with underscores. Example: BTC-USDT:USDT -> BTC_USDT_USDT
    - Available data types are: "futures", "funding_rate", "mark".
    - Available timeframes are: "1m", "5m", "8h".
    """
    # Checks if the data type is valid
    if _data_type not in ["futures", "funding_rate", "mark"]:
        raise ValueError(f"Invalid data type: {_data_type}")
    
    # Checks if the timeframe is valid
    if _timeframe not in ["1m", "5m", "8h"]:
        raise ValueError(f"Invalid timeframe: {_timeframe}")
    
    logger.debug("Building file path for %s %s %s %s", _data_path, _data_type, _timeframe, _symbol_market)
    # Normalize symbol_market
    _symbol_market = _symbol_market.replace("-", "_")
    _symbol_market = _symbol_market.replace(":", "_")
    
    file_name = f"{_symbol_market}-{_timeframe}-{_data_type}.feather"
    feather_path = _data_path / file_name
    
    logger.debug("Returning feather path: %s", feather_path)
    
    return feather_path


In [None]:
# Load OHLCV price data
file_path = get_feather_path(DATA_PATH, "futures", "5m", SYMBOL_MARKET)

ohlcv_df = pd.read_feather(file_path)
print("OHLCV Data:")
print(ohlcv_df.head())  # print the first 5 rows of the data

# Basic statistics of OHLCV data
print(ohlcv_df.describe())

# Convert wide-form data to long-form data
melted_data = pd.melt(ohlcv_df.loc[ohlcv_df["date"] >= "2024-01-01", :], id_vars=['date'], value_vars=['open', 'high', 'low', 'close', 'volume'])

# Plotting OHLCV data
px.line(melted_data, x='date', y='value', color='variable').show()


In [None]:
# Load OHLCV price data
file_path = get_feather_path(DATA_PATH, "funding_rate", "8h", SYMBOL_MARKET)

fr_df = pd.read_feather(file_path)
print("Funding rate Data:")
print(fr_df.head())  # print the first 5 rows of the data

# Basic statistics of OHLCV data
print(fr_df.describe())

# Plotting funding rate data in a bar chart
px.bar(fr_df, x='date', y='open').show()


In [None]:
# Load OHLCV price data
file_path = get_feather_path(DATA_PATH, "mark", "8h", SYMBOL_MARKET)

mark_df = pd.read_feather(file_path)
print("Funding rate Data:")
print(mark_df.head())  # print the first 5 rows of the data

# Basic statistics of OHLCV data
print(mark_df.describe())

# Convert wide-form data to long-form data
melted_data = pd.melt(mark_df.loc[mark_df["date"] >= "2024-01-01", :], id_vars=['date'], value_vars=['open', 'high', 'low', 'close', 'volume'])

# Plotting OHLCV data
px.line(melted_data, x='date', y='value', color='variable').show()
