In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

import nest_asyncio
nest_asyncio.apply()

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "vscode"            

import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
plt.style.use('ggplot')
params = {'legend.fontsize': 'x-large',
        'figure.figsize': (12, 8),
        'axes.labelsize': 'x-large',
        'axes.titlesize':'x-large',
        'xtick.labelsize':'x-large',
        'ytick.labelsize':'x-large'}
pylab.rcParams.update(params)

import pandas as pd
import numpy as np

import datetime
import pytz
NY_tz = pytz.timezone("America/New_York") 
CHI_tz = pytz.timezone("America/Chicago") 
UTC_tz = pytz.timezone("UTC") 

In [2]:
import datetime
import ujson as json
from pathlib import Path
from typing import Iterable, Optional

import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.dataset as ds
import QuantLib as ql
import tqdm


def _df_to_parquet(df: pd.DataFrame, path: Path, *, compression: Optional[str] = "zstd"):
    out = df.copy()
    for col in out.columns:
        s = out[col]

    if s.dtype == "object" and s.map(type).nunique() > 1:
        out[col] = s.astype(str)

    path.parent.mkdir(parents=True, exist_ok=True)
    tbl = pa.Table.from_pandas(out, preserve_index=False)
    pq.write_table(tbl, path, compression=compression)

In [3]:
PATH_STORE = {
    "2Y": Path("./TRACE_2Y"),
    "3Y": Path("./TRACE_3Y"),
    "5Y": Path("./TRACE_5Y"),
    "7Y": Path("./TRACE_7Y"),
    "10Y": Path("./TRACE_10Y"),
    "20Y": Path("./TRACE_20Y"),
    "30Y": Path("./TRACE_30Y"),
}

In [4]:
from FinraFetcher import FinraDataFetcher
ff = FinraDataFetcher(
    debug_verbose=True,
	info_verbose=True,
	error_verbose=True,
)

In [12]:
start = datetime.datetime(2025, 8, 11)
end = datetime.datetime(2025, 8, 15)
bdays = pd.date_range(start=start, end=end, freq="1b")
bdays

DatetimeIndex(['2025-08-11', '2025-08-12', '2025-08-13', '2025-08-14',
               '2025-08-15'],
              dtype='datetime64[ns]', freq='B')

In [None]:
for bd in bdays:
    try:
        benchmarks = ["2Y", "3Y", "5Y", "7Y", "10Y", "20Y", "30Y"]
        intraday_dict = ff.fetch_historcal_trace_trade_history_by_cusip_v3(
            start_date=bd,
            end_date=bd,
            benchmark_terms=benchmarks,
        )
        for bm in benchmarks:
            try:
                PATH_STORE[bm].mkdir(parents=True, exist_ok=True)
                output_path = PATH_STORE[bm] / f"{bd.strftime('%Y-%m-%d')}.parquet"
                _df_to_parquet(intraday_dict[bm], output_path)
            except Exception as e:
                print(f"error during write for {bm} on {bd}: {e}")
    except Exception as e:
        print(f"error during fetching for {bd}: {e}")

2025-09-27 23:18:49,581 - FinraDataFetcher - INFO - TRACE - FINRA proxy set: DIRECT
INFO	Task(Task-3) FinraDataFetcher:FinraFetcher.py:_get_sticky_proxy_url()- TRACE - FINRA proxy set: DIRECT
2025-09-27 23:19:48,325 - FinraDataFetcher - INFO - TRACE(h2) - FINRA totals probe took: 58.535s
INFO	Task(Task-2773) FinraDataFetcher:FinraFetcher.py:_run()- TRACE(h2) - FINRA totals probe took: 58.535s
2025-09-27 23:19:48,326 - FinraDataFetcher - DEBUG - TRACE(h2) - totals: {'2Y': 18972, '3Y': 29972, '5Y': 50408, '7Y': 19187, '10Y': 42031, '20Y': 8293, '30Y': 21590}
DEBUG	Task(Task-2773) FinraDataFetcher:FinraFetcher.py:_run()- TRACE(h2) - totals: {'2Y': 18972, '3Y': 29972, '5Y': 50408, '7Y': 19187, '10Y': 42031, '20Y': 8293, '30Y': 21590}
2025-09-27 23:22:54,356 - FinraDataFetcher - INFO - TRACE(h2) - Fetch All Took: 186.028s
INFO	Task(Task-2773) FinraDataFetcher:FinraFetcher.py:_run()- TRACE(h2) - Fetch All Took: 186.028s
2025-09-27 23:22:54,357 - FinraDataFetcher - INFO - TRACE(h2) - Total Ti

In [7]:
# for bd in bdays:
#     benchmarks = ["2Y", "3Y", "5Y", "7Y", "10Y", "20Y", "30Y"]
#     for bm in benchmarks:
#         intraday_dict = ff.fetch_historcal_trace_trade_history_by_cusip(
#             start_date=bd,
#             end_date=bd,
#             benchmark_terms=[bm],
#         )
#         PATH_STORE[bm].mkdir(parents=True, exist_ok=True)
#         output_path = PATH_STORE[bm] / f"incomplete_{bd.strftime('%Y-%m-%d')}.parquet"
#         _df_to_parquet(intraday_dict[bm], output_path)