# Agent-Augmented Portfolio Construction

## **Objective:**

Design a separately managed account (SMA) or fund portfolio drawn from the S&P 500 constituents that achieves a dividend yield at least 3% higher than the S&P 500 benchmark, while maintaining a beta close to 1. The strategy aims to outperform the S&P 500 Dividend Aristocrats Index while preserving realistic sector exposures and applying sustainability and market-regime overlays.

# Step 1: Data Gathering and Cleaning Pipeline

In [19]:
pip install --upgrade yfinance pandas numpy tqdm

Note: you may need to restart the kernel to use updated packages.


## Import S&P 500 Constituents


In [25]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta
import asyncio
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta
from tqdm.asyncio import tqdm_asyncio   # just for a nice progress bar


def get_sp500_constituents() -> pd.DataFrame:
    """
    Return a DataFrame with the current S&P 500 ticker symbols and company names.
    """
    # Wikipedia’s first table on the page is always the constituents list
    url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    df = pd.read_html(url, header=0)[0]          # grab the first table

    # Clean up column names and tickers
    df.rename(columns={"Symbol": "Ticker"}, inplace=True)
    df["Ticker"] = df["Ticker"].str.replace(".", "-", regex=False)

    return df

def _fetch_one(ticker, div_growth_years, price_return_years):
    t = yf.Ticker(ticker)

    # 1) Dividend yield & payout ratio
    info      = t.fast_info
    div_yield = info.get("dividendYield")          # often None
    # fallback: dividendRate / price
    div_rate  = t.info.get("dividendRate", np.nan)
    price     = info.get("last_price") or t.info.get("previousClose")
    if pd.isna(div_yield) and div_rate and price:
        div_yield = div_rate / price

    payout    = t.info.get("payoutRatio")

    # 2) Dividend CAGR (same as before)
    divs    = t.dividends.copy()
    div_cagr = np.nan
    if len(divs) > 1:
        divs.index = divs.index.tz_localize(None)
        cutoff     = datetime.now() - timedelta(days=365*div_growth_years)
        past       = divs[divs.index < cutoff]
        recent     = divs[divs.index >= cutoff]
        if len(past) and len(recent):
            past_total   = past.resample("Y").sum().iloc[-1]
            recent_total = recent.resample("Y").sum().iloc[-1]
            if past_total > 0:
                div_cagr = (recent_total / past_total) ** (1/div_growth_years) - 1

    # 3) Price return
    hist        = t.history(period=f"{price_return_years}y")["Close"]
    price_ret   = (hist.iloc[-1] / hist.iloc[0] - 1) if len(hist) else np.nan

    # 4) Other ratios
    roe = t.info.get("returnOnEquity")
    dte = t.info.get("debtToEquity")

    return {
        "Ticker": ticker,
        "DividendYield": div_yield,
        "PayoutRatio": payout,
        f"DivCAGR_{div_growth_years}y": div_cagr,
        f"PriceReturn_{price_return_years}y": price_ret,
        "ROE": roe,
        "DebtToEquity": dte,
    }

# ---------- async wrapper ----------
async def get_financial_metrics_async(tickers,
                                      div_growth_years: int = 5,
                                      price_return_years: int = 5,
                                      max_workers: int = 20) -> pd.DataFrame:
    """
    Concurrently fetch metrics for many tickers using asyncio + ThreadPoolExecutor.
    """
    if isinstance(tickers, str):
        tickers = [tickers]

    loop = asyncio.get_running_loop()
    rows = []

    with ThreadPoolExecutor(max_workers=max_workers) as pool:
        tasks = [
            loop.run_in_executor(
                pool,
                _fetch_one,
                tk,
                div_growth_years,
                price_return_years
            )
            for tk in tickers
        ]

        # tqdm_asyncio gives a progress bar; remove if you don't want it
        for result in tqdm_asyncio.as_completed(tasks, total=len(tasks)):
            rows.append(await result)

    return pd.DataFrame(rows).set_index("Ticker")

# ---------------- example usage ----------------

from time import perf_counter

sp500 = get_sp500_constituents()["Ticker"].tolist()[:50]   # try 50 first
metrics_df = await get_financial_metrics_async(
    sp500,
    div_growth_years=3,
    price_return_years=3,
    max_workers=20        # tune as you wish
)
metrics_df.head().round(4)

  past_total   = past.resample("Y").sum().iloc[-1]
  recent_total = recent.resample("Y").sum().iloc[-1]
  past_total   = past.resample("Y").sum().iloc[-1]
  recent_total = recent.resample("Y").sum().iloc[-1]
  past_total   = past.resample("Y").sum().iloc[-1]
  recent_total = recent.resample("Y").sum().iloc[-1]
  past_total   = past.resample("Y").sum().iloc[-1]
  recent_total = recent.resample("Y").sum().iloc[-1]
  past_total   = past.resample("Y").sum().iloc[-1]
  recent_total = recent.resample("Y").sum().iloc[-1]
  past_total   = past.resample("Y").sum().iloc[-1]
  recent_total = recent.resample("Y").sum().iloc[-1]
  past_total   = past.resample("Y").sum().iloc[-1]
  recent_total = recent.resample("Y").sum().iloc[-1]
  past_total   = past.resample("Y").sum().iloc[-1]
  recent_total = recent.resample("Y").sum().iloc[-1]
  past_total   = past.resample("Y").sum().iloc[-1]
  recent_total = recent.resample("Y").sum().iloc[-1]
  past_total   = past.resample("Y").sum().iloc[-1]
  recent_tota

Unnamed: 0_level_0,DividendYield,PayoutRatio,DivCAGR_3y,PriceReturn_3y,ROE,DebtToEquity
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ABNB,,0.0,,0.3041,0.3206,28.726
ALLE,0.015,0.2739,-0.1464,0.4955,0.4205,133.636
ALGN,,0.0,,-0.2395,0.1084,3.124
AKAM,,0.0,,-0.132,0.0982,100.746
ABBV,0.0344,2.688,0.0517,0.4482,0.884,4789.603


In [26]:
metrics_df

Unnamed: 0_level_0,DividendYield,PayoutRatio,DivCAGR_3y,PriceReturn_3y,ROE,DebtToEquity
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ABNB,,0.0,,0.304097,0.3206,28.726
ALLE,0.014989,0.2739,-0.146405,0.495522,0.42052,133.636
ALGN,,0.0,,-0.23946,0.10843,3.124
AKAM,,0.0,,-0.132046,0.09821,100.746
ABBV,0.034371,2.688,0.051659,0.448237,0.884,4789.603
GOOGL,0.004752,0.0893,,0.586865,0.34789,8.254
ACN,0.018834,0.4567,0.151233,0.158925,0.26965,26.706
AMD,,0.0,,0.516887,0.03904,8.174
ARE,0.072687,6.8947,0.047029,-0.38481,0.01476,61.193
A,0.008318,0.2384,-0.161054,0.041317,0.18883,60.039
