In [1]:
import json
import requests
from datetime import datetime
from bs4 import BeautifulSoup
import os
import pandas as pd 

In [28]:
class Scraper: 
    def __init__(self): 
        self.today = datetime.today().date().strftime('%Y-%m-%d')
        self.get_current_month() 
        self.format_payload_url() 

    def get_current_month(self):
        year, month = self.today.year, self.today.month
        if month < 10:
            month = f"0{month}"
        self.current_month = f"{year}{month}"

    def clean_scraped_date(self, date_str): 
        date_split = date_str.split("/")
        return "-".join([date_split[-1], date_split[0], date_split[1]])

    def format_payload_url(self):
        self.url = f"https://home.treasury.gov/resource-center/data-chart-center/interest-rates/TextView?type=daily_treasury_yield_curve&field_tdr_date_value_month={self.current_month}"

    def scrape_yield_curve_data_json(self):
        response = requests.get(self.url)
        soup = BeautifulSoup(response.text, features="html.parser")
        yc_values_on_date = soup.find_all("tr")[-1]
        parsed_data = [_.text.strip() for _ in yc_values_on_date.__dict__["contents"][1::2]]
        self.date_cleaned = self.clean_scraped_date(parsed_data[0])
        yc_data_json = {
            "Date": self.today,
            "1 Mo": float(parsed_data[10]),
            "2 Mo": float(parsed_data[11]),
            "3 Mo": float(parsed_data[12]),
            "6 Mo": float(parsed_data[14]),
            "1 Yr": float(parsed_data[15]),
            "2 Yr": float(parsed_data[16]),
            "3 Yr": float(parsed_data[17]),
            "5 Yr": float(parsed_data[18]),
            "7 Yr": float(parsed_data[19]),
            "10 Yr": float(parsed_data[20]),
            "20 Yr": float(parsed_data[21]),
            "30 Yr": float(parsed_data[22]),
            "2 Yr - 10 Yr": float(parsed_data[16]) - float(parsed_data[20]), 
            "2 Yr - 30 Yr": float(parsed_data[16]) - float(parsed_data[22]),
            "10 Yr - 30 Yr": float(parsed_data[20]) - float(parsed_data[22]),
        }
        return yc_data_json, response.status_code

    def get_yc_data(self, verbose = False):
        if verbose:
            print("Initiating attempt to retrieve Yield Curve data... ", end = " ")
        try: 
            self.data, self.status_code = self.scrape_yield_curve_data_json()
            if verbose: 
                print(f"successful with status code {self.status_code}.")
        except:
            raise ValueError(f"Attempt failed. Either 'Month' argument invalid or data cannot be parsed as implemented.")

    def save_to_json(self, dict_obj):
        save_dir = "./data/data_scraped"
        if not os.path.exists(save_dir): 
            os.mkdir(save_dir)
        with open(f"{save_dir}/{self.today}.json", "w") as newfile:
            json.dump(dict_obj, newfile)

In [10]:
scraper = Scraper() 

In [11]:
scraper.get_yc_data(verbose = True) 

Initiating attempt to retrieve Yield Curve data...  successful with status code 200.


In [15]:
pd.json_normalize(scraper.data)

Unnamed: 0,Date,1 Mo,2 Mo,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr,2 Yr - 10 Yr,2 Yr - 30 Yr,10 Yr - 30 Yr
0,2024-04-15,5.49,5.51,5.45,5.38,5.16,4.93,4.78,4.65,4.65,4.63,4.85,4.74,0.3,0.19,-0.11


In [31]:
df = pd.read_parquet("./data/data_cleaned/yield_curve_historical_rates_MASTER.parquet")

In [32]:
df.head()

Unnamed: 0,Date,1 Mo,2 Mo,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr,2 Yr - 10 Yr,2 Yr - 30 Yr,10 Yr - 30 Yr
0,2024-03-28,5.49,5.48,5.46,5.38,5.03,4.59,4.4,4.21,4.2,4.2,4.45,4.34,0.39,0.25,-0.14
1,2024-03-27,5.5,5.47,5.45,5.36,4.99,4.54,4.36,4.18,4.18,4.2,4.45,4.36,0.34,0.18,-0.16
2,2024-03-26,5.5,5.47,5.46,5.36,5.0,4.56,4.38,4.22,4.23,4.24,4.49,4.4,0.32,0.16,-0.16
3,2024-03-25,5.51,5.48,5.46,5.36,5.0,4.54,4.39,4.23,4.25,4.25,4.51,4.42,0.29,0.12,-0.17
4,2024-03-24,5.51,5.47,5.46,5.34,4.98,4.59,4.36,4.2,4.22,4.22,4.47,4.39,0.37,0.2,-0.17


In [33]:
df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
df.head()

Unnamed: 0,Date,1 Mo,2 Mo,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr,2 Yr - 10 Yr,2 Yr - 30 Yr,10 Yr - 30 Yr
0,2024-03-28,5.49,5.48,5.46,5.38,5.03,4.59,4.4,4.21,4.2,4.2,4.45,4.34,0.39,0.25,-0.14
1,2024-03-27,5.5,5.47,5.45,5.36,4.99,4.54,4.36,4.18,4.18,4.2,4.45,4.36,0.34,0.18,-0.16
2,2024-03-26,5.5,5.47,5.46,5.36,5.0,4.56,4.38,4.22,4.23,4.24,4.49,4.4,0.32,0.16,-0.16
3,2024-03-25,5.51,5.48,5.46,5.36,5.0,4.54,4.39,4.23,4.25,4.25,4.51,4.42,0.29,0.12,-0.17
4,2024-03-24,5.51,5.47,5.46,5.34,4.98,4.59,4.36,4.2,4.22,4.22,4.47,4.39,0.37,0.2,-0.17


In [21]:
new = pd.concat([pd.json_normalize(scraper.data), df])

In [24]:
new.head()

Unnamed: 0,Date,1 Mo,2 Mo,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr,2 Yr - 10 Yr,2 Yr - 30 Yr,10 Yr - 30 Yr
0,2024-04-15,5.49,5.51,5.45,5.38,5.16,4.93,4.78,4.65,4.65,4.63,4.85,4.74,0.3,0.19,-0.11
0,2024-03-28,5.49,5.48,5.46,5.38,5.03,4.59,4.4,4.21,4.2,4.2,4.45,4.34,0.39,0.25,-0.14
1,2024-03-27,5.5,5.47,5.45,5.36,4.99,4.54,4.36,4.18,4.18,4.2,4.45,4.36,0.34,0.18,-0.16
2,2024-03-26,5.5,5.47,5.46,5.36,5.0,4.56,4.38,4.22,4.23,4.24,4.49,4.4,0.32,0.16,-0.16
3,2024-03-25,5.51,5.48,5.46,5.36,5.0,4.54,4.39,4.23,4.25,4.25,4.51,4.42,0.29,0.12,-0.17


In [27]:
scraper.today.strftime('%Y-%m-%d') 

'2024-04-15'

In [29]:
# merge in scraped data using Scraper.today date 