In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import yfinance as yf
import time
import os

# Yahoo Mutual Fund URLs (with pagination if needed)
category_urls = {
    "MF Gainers": "https://finance.yahoo.com/markets/mutualfunds/gainers",
    "MF Top Performing": "https://finance.yahoo.com/markets/mutualfunds/top-performing",
    "MF Best Historical": "https://finance.yahoo.com/markets/mutualfunds/best-historical-performance"
}

# Headers with User-Agent and contact
headers = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/122.0.0.0 Safari/537.36"
    ),
    "From": "hrahman@ucdavis.edu"
}

# Create required folders
os.makedirs("./historical-data/mutual-funds", exist_ok=True)
os.makedirs("./market-data", exist_ok=True)

# Scrape up to 100 mutual fund symbols and names
def get_symbols_paginated(url, pages=1):
    results = []
    for page in range(pages):
        full_url = f"{url}?start={page * 100}&count=100"
        response = requests.get(full_url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")
        rows = soup.select("table tbody tr")
        for row in rows:
            cols = row.find_all("td")
            if len(cols) >= 2:
                symbol = cols[0].text.strip()
                name = cols[1].text.strip()
                results.append((symbol, name))
            if len(results) >= 100:
                break
        if len(results) >= 100:
            break
    return results

# Get percent change over a given period
def get_change_pct(ticker, period):
    try:
        hist = yf.Ticker(ticker).history(period=period)
        if hist.empty or len(hist["Close"]) < 2:
            return None
        return round(((hist["Close"].iloc[-1] - hist["Close"].iloc[0]) / hist["Close"].iloc[0]) * 100, 2)
    except:
        return None

# Get all change % for various durations
def get_all_changes(symbol):
    return {
        "Symbol": symbol,
        "1W Change %": get_change_pct(symbol, "5d"),
        "1M Change %": get_change_pct(symbol, "1mo"),
        "3M Change %": get_change_pct(symbol, "3mo"),
        "6M Change %": get_change_pct(symbol, "6mo"),
        "1Y Change %": get_change_pct(symbol, "1y"),
        "5Y Change %": get_change_pct(symbol, "5y")
    }

# Save full historical price series
def save_historical_data(symbol, period="2y"):
    try:
        df = yf.Ticker(symbol).history(period=period, interval="1d")
        if df.empty:
            print(f"-> No data found for {symbol}")
            return
        df.to_csv(f"historical-data/mutual-funds/{symbol}.csv")
        print(f"-> Saved historical data for {symbol}")
    except Exception as e:
        print(f"⚠️ Error saving {symbol}: {e}")

# Master loop
all_data = []

for category, url in category_urls.items():
    print(f"-----Scraping: {category}")
    symbol_name_pairs = get_symbols_paginated(url, pages=1)

    for symbol, name in symbol_name_pairs:
        print(f"→ {symbol} | {name}")
        data = get_all_changes(symbol)
        data["Name"] = name
        data["Category"] = category
        all_data.append(data)

        save_historical_data(symbol)
        time.sleep(1)

df = pd.DataFrame(all_data)
df.to_csv("market-data/mutual-fund-market-data.csv", index=False)
print("-> Saved to market-data/mutual-funds-market-data.csv")
display(df.head())

-----Scraping: MF Gainers
→ FTPAX | First Trust Private Assets Fund


$FTPAX: possibly delisted; no price data found  (period=5d)
$FTPAX: possibly delisted; no price data found  (period=1mo)
$FTPAX: possibly delisted; no price data found  (period=3mo)


-> Saved historical data for FTPAX
→ GRHIX | Goehring & Rozencwajg Resources Instl
-> Saved historical data for GRHIX
→ GRHAX | Goehring & Rozencwajg Resources Retail
-> Saved historical data for GRHAX
→ PMPIX | ProFunds Precious Metals UltraSector Inv
-> Saved historical data for PMPIX
→ PMPSX | ProFunds Precious Metals UltraSector Svc
-> Saved historical data for PMPSX
→ 0P00011WC1 | Kieger Fd I Global Real Assets AP1 USD
-> Saved historical data for 0P00011WC1
→ OCMAX | OCM Gold Atlas
-> Saved historical data for OCMAX
→ FGPMX | Franklin Gold and Precious Metals R6
-> Saved historical data for FGPMX
→ OCMGX | OCM Gold Investor
-> Saved historical data for OCMGX
→ FKRCX | Franklin Gold and Precious Metals A
-> Saved historical data for FKRCX
→ FRGOX | Franklin Gold and Precious Metals C
-> Saved historical data for FRGOX
→ FGADX | Franklin Gold and Precious Metals Adv
-> Saved historical data for FGADX
→ GOLDX | Gabelli Gold AAA
-> Saved historical data for GOLDX
→ INIIX | VanEck Int

$VYSCX: possibly delisted; no price data found  (period=5d)


→ VYSCX | Voya Small Company P3


$VYSCX: possibly delisted; no price data found  (period=1mo)
$VYSCX: possibly delisted; no price data found  (period=3mo)
$VYSCX: possibly delisted; no price data found  (period=6mo)
$VYSCX: possibly delisted; no price data found  (period=1y)


-> No data found for VYSCX
→ FEGOX | First Eagle Gold C
-> Saved historical data for FEGOX
→ FEGIX | First Eagle Gold I
-> Saved historical data for FEGIX
→ SGGDX | First Eagle Gold A
-> Saved historical data for SGGDX
→ FEURX | First Eagle Gold R6
-> Saved historical data for FEURX
→ EPGIX | EuroPac Gold Fund
-> Saved historical data for EPGIX
→ EPGFX | EuroPac Gold Fund
-> Saved historical data for EPGFX
→ QGLDX | Gold Bullion Strategy Investor
-> Saved historical data for QGLDX
→ 0P0000YUAL | Gold Bullion Strategy
-> Saved historical data for 0P0000YUAL
→ QGLCX | Gold Bullion Strategy Advisor
-> Saved historical data for QGLCX
→ 0P0001M32M | CI Bitcoin Series F
-> Saved historical data for 0P0001M32M
→ PSPFX | US Global Investors Global Res
-> Saved historical data for PSPFX
→ MOWNX | Moerus Worldwide Value N
-> Saved historical data for MOWNX
→ 0P0001JA5V | Multipartner Tata India Equity D USD Acc
-> Saved historical data for 0P0001JA5V
→ 0P0001J2MW | Multipartner Tata India Equity

Unnamed: 0,Symbol,1W Change %,1M Change %,3M Change %,6M Change %,1Y Change %,5Y Change %,Name,Category
0,FTPAX,,,,,,11.21,First Trust Private Assets Fund,MF Gainers
1,GRHIX,5.37,10.56,11.44,2.86,0.12,221.84,Goehring & Rozencwajg Resources Instl,MF Gainers
2,GRHAX,5.29,10.47,11.26,2.63,-0.21,216.34,Goehring & Rozencwajg Resources Retail,MF Gainers
3,PMPIX,9.44,-0.77,30.33,50.64,57.56,25.52,ProFunds Precious Metals UltraSector Inv,MF Gainers
4,PMPSX,9.43,-0.85,30.02,49.92,56.04,19.45,ProFunds Precious Metals UltraSector Svc,MF Gainers
