In [None]:
from __future__ import annotations

import time

import httpx
import polars as pl

In [None]:
url = "https://www.vanguardinvestor.co.uk/api/funds/{}"

vanguard_funds = [
    "vanguard-us-equity-index-fund-gbp-acc",
    "vanguard-uk-inflation-linked-gilt-index-fund-gbp-acc",
    "vanguard-ftse-uk-all-share-index-unit-trust-gbp-acc",
    "vanguard-ftse-100-index-unit-trust-gbp-acc",
    "vanguard-esg-developed-world-all-cap-equity-index-fund-uk-gbp-acc",
    "vanguard-ftse-uk-equity-income-index-fund-gbp-acc",
    "vanguard-ftse-developed-europe-ex-uk-equity-index-fund-gbp-acc",
    "vanguard-ftse-developed-world-ex-uk-equity-index-fund-gbp-acc",
    "vanguard-ftse-global-all-cap-index-fund-gbp-acc",
    "vanguard-uk-long-duration-gilt-index-fund-gbp-acc",
]

In [None]:
headers = {
    "Host": "www.vanguardinvestor.co.uk",
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0",
    "Accept": "application/json, text/plain, */*",
    "Accept-Language": "en-GB,en;q=0.5",
    "Accept-Encoding": "gzip, deflate, br, zstd",
    "DNT": "1",
    "Sec-GPC": "1",
    "Connection": "keep-alive",
    "Referer": "https://www.vanguardinvestor.co.uk/investments/vanguard-us-equity-index-fund-gbp-acc/price-performance",
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-origin",
    "TE": "trailers",
}

In [None]:
fields = [
    "id",
    "name",
    "assetClass",
    "inceptionDate",
    "benchmark",
    "currencyCode",
    "sedol",
    "OCF",
]

In [None]:
def format_vanguard_returns(_monthly_returns: list[dict], id: str) -> pl.DataFrame:
    """
    Format vanguard returns as polars dataframe.

    Parameters
    ----------
    _monthly_returns : list[dict]
        monthly returns from vanguard api
    id : str
        fund id

    Returns
    -------
    pl.dataframe
        monthly fund returns
    """
    monthly_returns = pl.from_dicts(_monthly_returns)
    monthly_returns = monthly_returns.with_columns(pl.lit(id).alias("id"))
    monthly_returns = monthly_returns.rename(
        {"asOfDate": "date", "monthPercent": "monthly_return"}
    )
    return monthly_returns

In [None]:
fund_details = []
fund_returns = []

with httpx.Client(headers=headers) as client:
    for fund in vanguard_funds:
        time.sleep(0.1)

        r = client.get(url.format(fund))

        fund_detail = {key: r.json()[key] for key in fields}
        fund_details.append(fund_detail)

        fund_return = format_vanguard_returns(
            r.json()["fundData"]["annualNAVReturns"]["returns"], fund_detail["id"]
        )
        fund_returns.append(fund_return)

In [None]:
details = pl.from_dicts(fund_details)
details = details.rename(
    {
        "assetClass": "asset_class",
        "inceptionDate": "inception_date",
        "currencyCode": "currency_code",
        "OCF": "ocf",
    }
)
details.write_json("../data/processed/fund_details.json", row_oriented=True)

In [None]:
fund_returns = pl.concat(fund_returns)
fund_returns.write_parquet("../data/processed/fund_returns.pq")