In [1]:
import json
import requests
from datetime import datetime
from bs4 import BeautifulSoup
import os
import pandas as pd 

In [2]:
class Scraper: 
    def __init__(self): 
        self.today = datetime.today().date()
        self.today_str = self.today.strftime('%Y-%m-%d')
        self.get_current_month() 
        self.format_payload_url() 
        self.data_cleaned_dir = "./data/data_cleaned/yield_curve_historical_rates_MASTER.parquet"

    def get_current_month(self):
        year, month = self.today.year, self.today.month
        if month < 10:
            month = f"0{month}"
        self.current_month = f"{year}{month}"

    def clean_scraped_date(self, date_str): 
        date_split = date_str.split("/")
        return "-".join([date_split[-1], date_split[0], date_split[1]])

    def format_payload_url(self):
        self.url = f"https://home.treasury.gov/resource-center/data-chart-center/interest-rates/TextView?type=daily_treasury_yield_curve&field_tdr_date_value_month={self.current_month}"

    def scrape_yield_curve_data_json(self):
        response = requests.get(self.url)
        soup = BeautifulSoup(response.text, features="html.parser")
        yc_values_on_date = soup.find_all("tr")[-1]
        parsed_data = [_.text.strip() for _ in yc_values_on_date.__dict__["contents"][1::2]]
        self.date_cleaned = self.clean_scraped_date(parsed_data[0])
        yc_data_json = {
            "Date": self.today_str,
            "1 Mo": float(parsed_data[10]),
            "2 Mo": float(parsed_data[11]),
            "3 Mo": float(parsed_data[12]),
            "6 Mo": float(parsed_data[14]),
            "1 Yr": float(parsed_data[15]),
            "2 Yr": float(parsed_data[16]),
            "3 Yr": float(parsed_data[17]),
            "5 Yr": float(parsed_data[18]),
            "7 Yr": float(parsed_data[19]),
            "10 Yr": float(parsed_data[20]),
            "20 Yr": float(parsed_data[21]),
            "30 Yr": float(parsed_data[22]),
            "2 Yr - 10 Yr": float(parsed_data[16]) - float(parsed_data[20]), 
            "2 Yr - 30 Yr": float(parsed_data[16]) - float(parsed_data[22]),
            "10 Yr - 30 Yr": float(parsed_data[20]) - float(parsed_data[22]),
        }
        return yc_data_json, response.status_code

    def get_yc_data(self, verbose = False):
        if verbose:
            print("Fetching data... ", end = " ")
        try: 
            self.data, self.status_code = self.scrape_yield_curve_data_json()
            if verbose: 
                print(f"successful with status code {self.status_code}.")
        except:
            raise ValueError(f"Attempt failed. Either 'Month' argument invalid or data cannot be parsed as implemented.")

    def save_to_json(self):
        save_dir = "./data/data_scraped"
        if not os.path.exists(save_dir): 
            os.mkdir(save_dir)
        with open(f"{save_dir}/{self.today_str}.json", "w") as newfile:
            json.dump(self.data, newfile)

    def merge_with_parquet(self, override_data = False): 
        df = pd.read_parquet(self.data_cleaned_dir) 
        # df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
        if (self.today_str not in df["Date"].values) or override_data: 
            df_merged = pd.concat([pd.json_normalize(self.data), df])
            df_merged.to_parquet(self.data_cleaned_dir, index = False)
        else: 
            print(f"Final pd.DataFrame object already contains data for {self.today_str}. If you want to override the existing data, set override_data to True.")

In [3]:
scraper = Scraper() 

In [4]:
scraper.get_yc_data(verbose = True) 

Initiating attempt to retrieve Yield Curve data...  successful with status code 200.


In [5]:
scraper.save_to_json()

In [6]:
scraper.merge_with_parquet(override_data = True)

In [7]:
df = pd.read_parquet(scraper.data_cleaned_dir)
df.head()

Unnamed: 0,Date,1 Mo,2 Mo,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr,2 Yr - 10 Yr,2 Yr - 30 Yr,10 Yr - 30 Yr
0,2024-04-17,5.49,5.51,5.45,5.39,5.18,4.97,4.83,4.69,4.69,4.67,4.88,4.77,0.3,0.2,-0.1
1,2024-04-17,5.49,5.51,5.45,5.39,5.18,4.97,4.83,4.69,4.69,4.67,4.88,4.77,0.3,0.2,-0.1
2,2024-04-17,5.49,5.51,5.45,5.39,5.18,4.97,4.83,4.69,4.69,4.67,4.88,4.77,0.3,0.2,-0.1
3,2024-03-28,5.49,5.48,5.46,5.38,5.03,4.59,4.4,4.21,4.2,4.2,4.45,4.34,0.39,0.25,-0.14
4,2024-03-27,5.5,5.47,5.45,5.36,4.99,4.54,4.36,4.18,4.18,4.2,4.45,4.36,0.34,0.18,-0.16
