#**FED extract**

The idea here is just to scrap the communications from the FED between 2012 and to apply BERT and compare monetary policies to see whether the FED's decisions influence the ECB.

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

BASE_URL = "https://www.federalreserve.gov"

data_log_response = requests.get(BASE_URL + "/monetarypolicy/materials/assets/final-recent.json")
if data_log_response.ok:
    data_log = [x for x in data_log_response.json()["mtgitems"] if x["type"] == "Mn"]

    filtered_data = []

    for item in data_log:
        html_item = {
            "Date": item["d"],
            "Type": "Minute",
            "Release Date": item.get("dt", "Not Available"),
            "URL": next(
                (file["url"] for file in item["files"] if file["name"] == "HTML"), None
            ),
        }
        filtered_data.append(html_item)

    # Scrape and process the text of the FED communications
    for item in filtered_data:
        if item["URL"] is not None:
            full_url = BASE_URL + item["URL"]
            response = requests.get(full_url)
            if response.ok:
                doc = BeautifulSoup(response.text, features="html5lib")
                article_div = doc.find("div", id="article")
                if article_div:
                    full_text = article_div.text.strip()
                    # Extract the section of the text, for that we searched manually the pattern of each text
                    start_index = full_text.find("Developments in Financial Markets and Open Market Operations")
                    end_index = full_text.find("Notation Vote")

                    if start_index == -1:  # If the first section is not found, look for the second one (second patterns)
                        start_index = full_text.find("Developments in Financial Markets and the Federal Reserve's Balance Sheet")

                    if start_index != -1 and end_index != -1:
                        relevant_text = full_text[start_index:end_index]
                    else:
                        relevant_text = ""
                    item["Text"] = relevant_text

    # Turn into dataframe to applicate Bert on it
    communications_df = pd.json_normalize(filtered_data).drop(columns=["URL"])[
        ["Date", "Release Date", "Type", "Text"]
    ]

    communications_df.to_csv("communications.csv", index=False)