In [30]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

import nest_asyncio
nest_asyncio.apply()

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "vscode"            

import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
plt.style.use('ggplot')
params = {'legend.fontsize': 'x-large',
        'figure.figsize': (12, 8),
        'axes.labelsize': 'x-large',
        'axes.titlesize':'x-large',
        'xtick.labelsize':'x-large',
        'ytick.labelsize':'x-large'}
pylab.rcParams.update(params)

import pandas as pd
import numpy as np

import datetime
import pytz
NY_tz = pytz.timezone("America/New_York") 
CHI_tz = pytz.timezone("America/Chicago") 
UTC_tz = pytz.timezone("UTC") 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [32]:
import datetime
import ujson as json
from pathlib import Path
from typing import Iterable, Optional

import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.dataset as ds
import QuantLib as ql
import tqdm


def _df_to_parquet(df: pd.DataFrame, path: Path, *, compression: Optional[str] = "zstd"):
    out = df.copy()
    for col in out.columns:
        s = out[col]

    if s.dtype == "object" and s.map(type).nunique() > 1:
        out[col] = s.astype(str)

    path.parent.mkdir(parents=True, exist_ok=True)
    tbl = pa.Table.from_pandas(out, preserve_index=False)
    pq.write_table(tbl, path, compression=compression)

In [33]:
PATH_STORE = {
    "2Y": Path("./TRACE_2Y"),
    "3Y": Path("./TRACE_3Y"),
    "5Y": Path("./TRACE_5Y"),
    "7Y": Path("./TRACE_7Y"),
    "10Y": Path("./TRACE_10Y"),
    "20Y": Path("./TRACE_20Y"),
    "30Y": Path("./TRACE_30Y"),
}

In [34]:
from FinraFetcher import FinraDataFetcher
ff = FinraDataFetcher(
    debug_verbose=True,
	info_verbose=True,
	error_verbose=True,
)

In [41]:
start = datetime.datetime(2025, 9, 15)
end = datetime.datetime(2025, 9, 19)
bdays = pd.date_range(start=start, end=end, freq="1b")
bdays

DatetimeIndex(['2025-09-15', '2025-09-16', '2025-09-17', '2025-09-18',
               '2025-09-19'],
              dtype='datetime64[ns]', freq='B')

In [None]:
for bd in bdays:
    try:
        benchmarks = ["2Y", "3Y", "5Y", "7Y", "10Y", "20Y", "30Y"]
        intraday_dict = ff.fetch_historcal_trace_trade_history_by_cusip_v2(
            start_date=bd,
            end_date=bd,
            benchmark_terms=benchmarks,
            # session_timeout_minutes=8,
            # max_connections=8,
            # max_in_flight=3,
            force_close=False,
        )
        for bm in benchmarks:
            try:
                PATH_STORE[bm].mkdir(parents=True, exist_ok=True)
                output_path = PATH_STORE[bm] / f"{bd.strftime('%Y-%m-%d')}.parquet"
                _df_to_parquet(intraday_dict[bm], output_path)
            except Exception as e:
                print(f"error during write for {bm} on {bd}: {e}")
    except Exception as e:
        print(f"error during fetching for {bd}: {e}")

2025-09-27 17:44:41,612 - FinraDataFetcher - INFO - TRACE - FINRA totals probe took: 65.605s
INFO	Task(Task-1776) FinraDataFetcher:FinraFetcher.py:_run()- TRACE - FINRA totals probe took: 65.605s
2025-09-27 17:44:41,613 - FinraDataFetcher - DEBUG - TRACE - totals: {'2Y': 35822, '3Y': 37802, '5Y': 63586, '7Y': 22161, '10Y': 54780, '20Y': 11990, '30Y': 26319}
DEBUG	Task(Task-1776) FinraDataFetcher:FinraFetcher.py:_run()- TRACE - totals: {'2Y': 35822, '3Y': 37802, '5Y': 63586, '7Y': 22161, '10Y': 54780, '20Y': 11990, '30Y': 26319}
2025-09-27 17:52:33,261 - FinraDataFetcher - INFO - TRACE - Fetch All Took: 471.647s
INFO	Task(Task-1776) FinraDataFetcher:FinraFetcher.py:_run()- TRACE - Fetch All Took: 471.647s
2025-09-27 17:52:33,264 - FinraDataFetcher - INFO - TRACE - Total Time Elapsed: 537.435s
INFO	Task(Task-2) FinraDataFetcher:FinraFetcher.py:fetch_historcal_trace_trade_history_by_cusip_v2()- TRACE - Total Time Elapsed: 537.435s
2025-09-27 17:53:36,324 - FinraDataFetcher - INFO - TRACE 

In [None]:
# for bd in bdays:
#     benchmarks = ["2Y", "3Y", "5Y", "7Y", "10Y", "20Y", "30Y"]
#     for bm in benchmarks:
#         intraday_dict = ff.fetch_historcal_trace_trade_history_by_cusip(
#             start_date=bd,
#             end_date=bd,
#             benchmark_terms=[bm],
#         )
#         PATH_STORE[bm].mkdir(parents=True, exist_ok=True)
#         output_path = PATH_STORE[bm] / f"incomplete_{bd.strftime('%Y-%m-%d')}.parquet"
#         _df_to_parquet(intraday_dict[bm], output_path)