# Imports

In [46]:
import pandas as pd
import QuantLib as ql
from datetime import datetime

# Load data

In [47]:
data = pd.read_parquet('/Users/vittoriomanfriani/Desktop/bond_data_auction.parquet')
# take data only from 2010-01-01
data = data.loc[data.index.get_level_values(0) >= '2010-01-01']
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,dv01,coupon,maturity,price,yield,first_datapoint,time to maturity,record_date,cusip,security_type,...,treas_retail_accepted,treas_retail_tenders_accepted,unadj_accrued_int_per1000,unadj_price,xml_filenm_announcemt,xml_filenm_comp_results,inflation_index_security,tint_cusip_1,tint_cusip_2,is_tap
timestamp,id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2010-01-01,US912810DP04,5.878,11.25,2015-02-15,141.125,2.607,1990-01-03,5.122519,1985-05-15,912810DP0,Bond,...,,,,,,,No,,,False
2010-01-01,US912810DS43,6.362,10.625,2015-08-15,140.546875,2.774,1990-01-03,5.61807,1985-08-15,912810DS4,Bond,...,,,,,,,No,,,False
2010-01-01,US912810DT26,6.506,9.875,2015-11-15,137.46875,2.881,1990-01-03,5.869952,1985-11-29,912810DT2,Bond,...,,,,,,,No,,,False
2010-01-01,US912810DV71,6.656,9.25,2016-02-15,134.796875,2.981,1990-01-03,6.121834,1986-02-18,912810DV7,Bond,...,,,,,,,No,,,False
2010-01-01,US912810DW54,6.521,7.25,2016-05-15,123.875,3.087,1990-01-03,6.368241,1986-08-15,912810DW5,Bond,...,,,,,,,No,,,False


In [48]:
data = data[['coupon', 'maturity', 'yield', 'price','time to maturity', 'issue_date']]
# Prepare the dataset
data = data.rename(columns={
    "maturity": "maturity_date"
})

In [49]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,coupon,maturity_date,yield,price,time to maturity,issue_date
timestamp,id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-01,US912810DP04,11.250,2015-02-15,2.607000,141.125000,5.122519,1985-05-15
2010-01-01,US912810DS43,10.625,2015-08-15,2.774000,140.546875,5.618070,1985-08-15
2010-01-01,US912810DT26,9.875,2015-11-15,2.881000,137.468750,5.869952,1985-11-29
2010-01-01,US912810DV71,9.250,2016-02-15,2.981000,134.796875,6.121834,1986-02-18
2010-01-01,US912810DW54,7.250,2016-05-15,3.087000,123.875000,6.368241,1986-08-15
...,...,...,...,...,...,...,...
2024-11-01,US912828Y958,1.875,2026-07-31,5.992257,96.083984,1.744011,2019-07-31
2024-11-04,US912828Y958,1.875,2026-07-31,6.004602,96.156250,1.735797,2019-07-31
2024-11-05,US912828Y958,1.875,2026-07-31,6.019502,96.128906,1.733060,2019-07-31
2024-11-06,US912828Y958,1.875,2026-07-31,6.036694,96.013672,1.730322,2019-07-31


# Spot Rates Calculation

In [50]:
def pydatetime_to_quantlib_date(py_datetime: datetime) -> ql.Date:
    return ql.Date(py_datetime.day, py_datetime.month, py_datetime.year)


def quantlib_date_to_pydatetime(ql_date: ql.Date):
    return datetime(ql_date.year(), ql_date.month(), ql_date.dayOfMonth())


In [56]:
def get_spot_rates(yieldcurve, day_count,
                   calendar=ql.UnitedStates(ql.UnitedStates.GovernmentBond), months=361):
    spots = []
    tenors = []
    ref_date = yieldcurve.referenceDate()
    for month in range(0, months):
        yrs = month / 12.0
        d = calendar.advance(ref_date, ql.Period(month, ql.Months))
        compounding = ql.Compounded
        freq = ql.Semiannual

        try:
            zero_rate = yieldcurve.zeroRate(yrs, compounding, freq)
            eq_rate = zero_rate.equivalentRate(
                day_count, compounding, freq, ref_date, d
            ).rate()
            tenors.append(yrs)
            spots.append(100 * eq_rate)
        except RuntimeError as e:
            print(f"Error processing month {month}, date {d}: {e}")
            break

    return pd.DataFrame(list(zip(tenors, spots)),
                        columns=["Maturities", "Curve"],
                        index=[''] * len(tenors))

# Spot Rates Bootstrapper

In [57]:
# function to construct yield curve

def get_spot_rates_bootstrapper(curve_set_df: pd.DataFrame, current_date: datetime):
    calendar = ql.UnitedStates(m=ql.UnitedStates.GovernmentBond)
    current_date = calendar.adjust(pydatetime_to_quantlib_date(py_datetime=current_date))
    ql.Settings.instance().evaluationDate = current_date

    t_plus = 1
    bond_settlement_date = calendar.advance(current_date, ql.Period(t_plus, ql.Days))
    frequency = ql.Semiannual
    day_count = ql.ActualActual(ql.ActualActual.ISDA)
    par = 100.0

    bond_helpers = []
    seen_maturities = set()

    # Iterate over the dataset
    for _, row in curve_set_df.iterrows():
        maturity = pydatetime_to_quantlib_date(row["maturity_date"])

        # Skip duplicate maturities
        if maturity in seen_maturities:
            print(f"Skipping duplicate maturity date: {maturity}")
            continue

        seen_maturities.add(maturity)

        schedule = ql.Schedule(
            bond_settlement_date,
            maturity,
            ql.Period(frequency),
            calendar,
            ql.ModifiedFollowing,
            ql.ModifiedFollowing,
            ql.DateGeneration.Backward,
            False,
        )
        helper = ql.FixedRateBondHelper(
            ql.QuoteHandle(ql.SimpleQuote(row['price'])),
            t_plus,
            100.0,
            schedule,
            [row["coupon"] / 100],
            day_count,
            ql.ModifiedFollowing,
            par,
        )
        bond_helpers.append(helper)

    # Create the yield curve
    yc_logcubicdiscount = ql.PiecewiseLogCubicDiscount(current_date, bond_helpers, day_count)
    yc_logcubicdiscount.enableExtrapolation()

    # Get spot rates
    splcd = get_spot_rates(yc_logcubicdiscount, day_count)

    return splcd

# Tests

In [64]:
curve_set_df = data.loc[data.index.get_level_values(0)[0]]

In [59]:
current_date = data.index.get_level_values(0)[0]
zero_rate_curve = get_spot_rates_bootstrapper(curve_set_df, current_date)

Skipping duplicate maturity date: February 15th, 2010
Skipping duplicate maturity date: February 15th, 2015
Skipping duplicate maturity date: August 15th, 2010
Skipping duplicate maturity date: August 15th, 2015
Skipping duplicate maturity date: November 15th, 2015
Skipping duplicate maturity date: February 15th, 2016
Skipping duplicate maturity date: May 15th, 2016
Skipping duplicate maturity date: November 15th, 2016
Skipping duplicate maturity date: February 15th, 2010
Skipping duplicate maturity date: May 15th, 2010
Skipping duplicate maturity date: May 15th, 2017
Skipping duplicate maturity date: August 15th, 2017
Skipping duplicate maturity date: May 15th, 2018
Skipping duplicate maturity date: November 15th, 2018
Skipping duplicate maturity date: February 15th, 2012
Skipping duplicate maturity date: February 15th, 2019
Skipping duplicate maturity date: February 28th, 2011
Skipping duplicate maturity date: March 31st, 2011
Skipping duplicate maturity date: April 30th, 2011
Skippi

In [65]:
zero_rate_curve

Unnamed: 0,Maturities,Curve
,0.000000,0.000000
,0.083333,-0.075744
,0.166667,-0.083595
,0.250000,0.003502
,0.333333,0.073365
...,...,...
,29.666667,4.849545
,29.750000,4.849781
,29.833333,4.849526
,29.916667,4.849129
