# TardisStorage Tests

This notebook demonstrates the TardisStorage implementation for reading historical cryptocurrency market data from Tardis.dev.

## Features
- Auto-caching of downloaded data to `/data/tardis/`
- Smart download (only downloads missing files)
- Support for all Tardis data types (trades, orderbook, quotes, etc.)
- Multi-symbol reading
- Market type filtering (SWAP, FUTURE)
- Jupyter-compatible (uses nest_asyncio)

In [1]:
import qubx

%qubxd

from qubx.data import TardisStorage
from qubx.core.basics import DataType


⠀⠀⡰⡖⠒⠒⢒⢦⠀⠀   
⠀⢠⠃⠈⢆⣀⣎⣀⣱⡀  [31mQUBX[0m | [36mQuantitative Backtesting Environment[0m 
⠀⢳⠒⠒⡞⠚⡄⠀⡰⠁         (c) 2025, ver. [35m0.7.27[0m
⠀⠀⠱⣜⣀⣀⣈⣦⠃⠀⠀⠀ 
        


In [2]:
# Initialize TardisStorage
storage = TardisStorage()
print(f"Storage: {storage}")
print(f"API URL: {storage.api_url}")

Storage: <qubx.data.storages.tardis.TardisStorage object at 0x74468df6cda0>
API URL: https://api.tardis.dev/v1


## TardisStorage Methods

In [3]:
# Get available exchanges
exchanges = storage.get_exchanges()
print(f"Total exchanges: {len(exchanges)}")
print(f"\nFirst 10 exchanges:")
for ex in exchanges[:10]:
    print(f"  - {ex}")

Total exchanges: 59

First 10 exchanges:
  - bitmex
  - deribit
  - binance-futures
  - binance-delivery
  - binance-options
  - binance-european-options
  - binance
  - ftx
  - okex-futures
  - okex-options


In [6]:
# Get market types (contract types) for binance-futures
# Market types represent contract types: SWAP (perpetual), FUTURE (dated)
market_types = storage.get_market_types("BINANCE.UM")
print(f"Available market types for binance-futures: {market_types}")

# Get data types (channels) - these are the types of data available
data_types = storage.get_data_types("BINANCE.UM")
print(f"\nAvailable data channels ({len(data_types)}):")
for ch in data_types:
    print(f"  - {ch}")

Available market types for binance-futures: ['FUTURE', 'SWAP']

Available data channels (17):
  - trade
  - aggTrade
  - ticker
  - depth
  - depthSnapshot
  - markPrice
  - bookTicker
  - forceOrder
  - openInterest
  - recentTrades
  - compositeIndex
  - assetIndex
  - topLongShortAccountRatio
  - topLongShortPositionRatio
  - globalLongShortAccountRatio
  - takerlongshortRatio
  - !contractInfo


In [7]:
# Get available symbols for binance-futures
symbols = storage.get_symbols("BINANCE.UM")
print(f"Total symbols: {len(symbols)}")
print(f"\nFirst 20 symbols:")
print(symbols[:20])

Total symbols: 763

First 20 symbols:
['btcusdt', 'ethusdt', 'bchusdt', 'xrpusdt', 'ltcusdt', 'trxusdt', 'etcusdt', 'linkusdt', 'xlmusdt', 'adausdt', 'xmrusdt', 'dashusdt', 'zecusdt', 'xtzusdt', 'atomusdt', 'bnbusdt', 'ontusdt', 'iotausdt', 'batusdt', 'vetusdt']


In [8]:
# Test with Qubx convention (binance.um -> binance-futures)
market_types_qubx = storage.get_market_types("BINANCE.UM")
print(f"Market types for 'binance.um' (Qubx convention): {market_types_qubx}")

Market types for 'binance.um' (Qubx convention): ['FUTURE', 'SWAP']


## TardisReader Methods

In [9]:
# Get a reader for SWAP (perpetual) contracts
# This is the new API: reader = storage.get_reader(exchange, market_type)
reader = storage.get_reader("BINANCE.UM", "SWAP")
print(f"Reader exchange: {reader.exchange}")
print(f"Reader market type: {reader.market_type}")
print(f"Available symbols: {len(reader.get_data_id())}")
print(f"Available data types: {reader.get_data_types('BTCUSDT')}")

Reader exchange: binance-futures
Reader market type: perpetual
Available symbols: 717
Available data types: [trade, orderbook, funding_rate, quote, liquidation]


In [10]:
# Get time range for a symbol
time_range = reader.get_time_range("BTCUSDT", DataType.TRADE)
print(f"BTCUSDT time range: {time_range}")

# Get all data types available for a symbol (from available channels)
dtypes = reader.get_data_types("BTCUSDT")
print(f"BTCUSDT available data types: {dtypes}")

BTCUSDT time range: (numpy.datetime64('2019-11-17T00:00:00.000'), numpy.datetime64('2025-12-31T00:00:00.000000'))
BTCUSDT available data types: [trade, orderbook, funding_rate, quote, liquidation]


## Reading Data

In [11]:
# Read trades for single symbol (uses cached data if available)
data = reader.read("BTCUSDT", DataType.TRADE, "2024-11-01", "2024-11-02")
print(f"Data type: {type(data).__name__}")
print(f"Data ID: {data.data_id}")
print(f"Columns: {data.names}")
print(f"Rows: {len(data):,}")
print(f"Time interval: {data.get_time_interval()}")

Data type: RawData
Data ID: BTCUSDT
Columns: ['exchange', 'symbol', 'timestamp', 'local_timestamp', 'id', 'side', 'price', 'amount']
Rows: 6,019,836
Time interval: (1730419200039000, 1730591999963000)


In [12]:
# Transform to pandas DataFrame
# Note: Tardis uses microseconds for timestamps, so we need to specify timestamp_units="us"
from qubx.data.transformers import PandasFrame

df = data.transform(PandasFrame(timestamp_units="us"))
print(f"DataFrame shape: {df.shape}")
print(f"\nFirst 5 rows:")
df.head()

DataFrame shape: (6019836, 7)

First 5 rows:


Unnamed: 0_level_0,exchange,symbol,local_timestamp,id,side,price,amount
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-11-01 00:00:00.039,binance-futures,BTCUSDT,1730419200043553,5522940885,sell,70321.9,0.002
2024-11-01 00:00:04.029,binance-futures,BTCUSDT,1730419204033384,5522940886,buy,70322.0,0.042
2024-11-01 00:00:04.029,binance-futures,BTCUSDT,1730419204033386,5522940887,buy,70322.0,0.029
2024-11-01 00:00:04.031,binance-futures,BTCUSDT,1730419204034457,5522940888,buy,70322.0,0.002
2024-11-01 00:00:04.033,binance-futures,BTCUSDT,1730419204036691,5522940889,sell,70321.9,0.001


In [19]:
# Tardis timestamp format explanation
# Tardis uses MICROSECONDS (us), not nanoseconds (ns)
import pandas as pd

raw_ts = data.raw[0][2]  # timestamp column
print(f"Raw timestamp value: {raw_ts}")
print(f"Interpreted as microseconds: {pd.Timestamp(raw_ts, unit='us')}")
print(f"\nWhen using PandasFrame, use: PandasFrame(timestamp_units='us')")

Raw timestamp value: 1730419200039000
Interpreted as microseconds: 2024-11-01 00:00:00.039000

When using PandasFrame, use: PandasFrame(timestamp_units='us')


In [21]:
# Read multiple symbols at once
multi_data = reader.read(["BTCUSDT", "ETHUSDT"], DataType.TRADE, "2024-11-01", "2024-11-02")
print(f"Data type: {type(multi_data).__name__}")
print(f"Data IDs: {multi_data.get_data_ids()}")
print(f"\nRows per symbol:")
for data_id in multi_data.get_data_ids():
    print(f"  {data_id}: {len(multi_data[data_id]):,} rows")

Data type: RawMultiData
Data IDs: ['BTCUSDT', 'ETHUSDT']

Rows per symbol:
  BTCUSDT: 6,019,836 rows
  ETHUSDT: 6,405,805 rows


In [3]:
# Shorthand access via __getitem__
reader2 = storage["binance-futures", "trade"]
print(f"Reader via shorthand: {reader2.exchange} / {reader2.channel}")

NameError: name 'storage' is not defined

In [14]:
# Shorthand access via __getitem__
reader2 = storage["binance-futures", "SWAP"]
print(f"Reader via shorthand: {reader2.exchange} / {reader2.market_type}")

Reader via shorthand: binance-futures / perpetual


## Configuration & Cache

In [15]:
# Access via StorageRegistry
from qubx.data.registry import StorageRegistry

storage_from_registry = StorageRegistry.get("tardis")
print(f"Storage from registry: {type(storage_from_registry).__name__}")
print(f"Is registered: {StorageRegistry.is_registered('tardis')}")

Storage from registry: TardisStorage
Is registered: True


In [16]:
# Read different data types with the same reader
# The reader now supports multiple data types through the dtype parameter in read()
print("Reading ORDERBOOK data...")
ob_data = reader.read("BTCUSDT", DataType.ORDERBOOK, "2024-11-01", "2024-11-02")
print(f"Orderbook data type: {type(ob_data).__name__}")
print(f"Columns: {ob_data.names}")
print(f"Rows: {len(ob_data):,}")

Reading ORDERBOOK data...


Orderbook data type: RawData
Columns: ['exchange', 'symbol', 'timestamp', 'local_timestamp', 'is_snapshot', 'side', 'price', 'amount']
Rows: 186,295,191


In [17]:
# Show cached data structure
from pathlib import Path
from qubx.data.storages.tardis import TARDIS_DATA_DIR, TARDIS_API_KEY

print(f"Data directory: {TARDIS_DATA_DIR}")
print(f"API key set: {'Yes' if TARDIS_API_KEY else 'No'}")

print("\nCached data in TARDIS_DATA_DIR:")
for exchange_dir in sorted(TARDIS_DATA_DIR.iterdir()):
    if exchange_dir.is_dir() and not exchange_dir.name.startswith('.'):
        print(f"\n{exchange_dir.name}/")
        for data_type_dir in sorted(exchange_dir.iterdir()):
            if data_type_dir.is_dir():
                files = list(data_type_dir.glob("*.csv.gz"))
                print(f"  {data_type_dir.name}/ ({len(files)} files)")

Data directory: /data/tardis
API key set: No

Cached data in TARDIS_DATA_DIR:

binance/
  trades/ (1 files)

binance-futures/


  book_snapshot_25/ (350 files)


  incremental_book_L2/ (11179 files)
  quotes/ (30 files)


  trades/ (11179 files)

bitmex/
  incremental_book_L2/ (302 files)
  trades/ (302 files)

bybit/
  converted/ (0 files)
  incremental_book_L2/ (300 files)
  trades/ (300 files)

cryptofacilities/
  incremental_book_L2/ (302 files)
  trades/ (302 files)

deribit/
  incremental_book_L2/ (302 files)
  trades/ (302 files)

exhange_infos/

tmp/


In [18]:
# Test FUTURE market type - dated futures contracts
future_reader = storage.get_reader("binance-futures", "FUTURE")
print(f"Future contracts reader:")
print(f"  Market type: {future_reader.market_type}")
print(f"  Available dated futures: {len(future_reader.get_data_id())}")
print(f"  Example symbols: {future_reader.get_data_id()[:5]}")

Future contracts reader:
  Market type: future
  Available dated futures: 46
  Example symbols: ['BTCUSDT_260327', 'ETHUSDT_260327', 'BTCUSDT_260626', 'ETHUSDT_260626', 'BTCUSDT_251226']


In [27]:
# Chunked reading - memory efficient day-by-day processing
# Tardis data is stored in daily files, so chunksize is always treated as 1 (one day per chunk)
print("Chunked reading (day-by-day):")
for i, chunk in enumerate(reader.read("BTCUSDT", DataType.TRADE, "2024-11-01", "2024-11-03", chunksize=1)):
    df = chunk.transform(PandasFrame(timestamp_units="us"))
    print(f"  Day {i+1}: {len(chunk):,} rows, {df.index[0].date()}")

Chunked reading (day-by-day):
  Day 1: 4,618,172 rows, 2024-11-01
  Day 2: 1,401,664 rows, 2024-11-02
