In [14]:
# Final script: Scrape mutual funds per category (no Selenium), compute flows with names

import requests
from bs4 import BeautifulSoup
import pandas as pd
import yfinance as yf
import time

# Yahoo Mutual Fund URLs to scrape with pagination support
category_urls = {
    "MF Gainers": "https://finance.yahoo.com/markets/mutualfunds/gainers",
    "MF Losers": "https://finance.yahoo.com/markets/mutualfunds/losers",
    "MF Top Performing": "https://finance.yahoo.com/markets/mutualfunds/top-performing",
    "MF Best Historical": "https://finance.yahoo.com/markets/mutualfunds/best-historical-performance"
}

headers = {
    "User-Agent": "Mozilla/5.0"
}

# Scrape up to 100 mutual fund symbols and names from a paginated Yahoo Finance category
def get_symbols_paginated(url, pages=1):
    results = []
    for page in range(pages):
        full_url = f"{url}?start={page * 100}&count=100"
        response = requests.get(full_url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")
        rows = soup.select("table tbody tr")
        for row in rows:
            cols = row.find_all("td")
            if len(cols) >= 2:
                symbol = cols[0].text.strip()
                name = cols[1].text.strip()
                results.append((symbol, name))
            if len(results) >= 100:
                break
        if len(results) >= 100:
            break
    return results

# Get percent change from yfinance
def get_change_pct(ticker, period):
    try:
        hist = yf.Ticker(ticker).history(period=period)
        if hist.empty or len(hist["Close"]) < 2:
            return None
        return round(((hist["Close"].iloc[-1] - hist["Close"].iloc[0]) / hist["Close"].iloc[0]) * 100, 2)
    except:
        return None

# Get all change periods
def get_all_changes(symbol):
    return {
        "Symbol": symbol,
        "1W Change %": get_change_pct(symbol, "5d"),
        "1M Change %": get_change_pct(symbol, "1mo"),
        "3M Change %": get_change_pct(symbol, "3mo"),
        "6M Change %": get_change_pct(symbol, "6mo"),
        "1Y Change %": get_change_pct(symbol, "1y"),
        "5Y Change %": get_change_pct(symbol, "5y")
    }

# Process all mutual fund categories
all_data = []

for category, url in category_urls.items():
    print(f"Fetching: {category}")
    symbol_name_pairs = get_symbols_paginated(url, pages=1)

    for symbol, name in symbol_name_pairs:
        print(f"→ {symbol}")
        data = get_all_changes(symbol)
        data["Name"] = name
        data["Category"] = category
        all_data.append(data)
        time.sleep(1)  # avoid hitting rate limit

# Save and show
df = pd.DataFrame(all_data)
df.to_csv("mutual_fund_flows.csv", index=False)
print("✅ Saved as mutual_fund_flows.csv")
df.head()

Fetching: MF Gainers
→ FTPAX


ERROR:yfinance:$FTPAX: possibly delisted; no price data found  (period=5d)
ERROR:yfinance:$FTPAX: possibly delisted; no price data found  (period=1mo)
ERROR:yfinance:$FTPAX: possibly delisted; no price data found  (period=3mo)


→ 0P0001UN1A
→ 0P0001A9KH
→ 0P0000A413
→ 0P0001J4GT
→ 0P000029DW
→ 0P0001MIF5
→ 0P0000YZV8
→ 0P00017BEX
→ 0P0000N3QI
→ 0P000021JE


ERROR:yfinance:$0P000021JE: possibly delisted; no price data found  (period=5d)
ERROR:yfinance:$0P00001RUJ: possibly delisted; no price data found  (period=5d)


→ 0P00001RUJ
→ 0P0001Q4ZF


ERROR:yfinance:$0P0001Q4ZF: possibly delisted; no price data found  (period=5d)


→ 0P0001Q4ZG


ERROR:yfinance:$0P0001Q4ZG: possibly delisted; no price data found  (period=5d)


→ PSIJX
→ PSSIX
→ PSSJX
→ PPVIX
→ PPIMX
→ PPQJX
→ PSBJX
→ 0P0001T4DP
→ 0P0001T4DO
→ PSLIX
→ PSPJX
→ 0P0001QEK2
→ HCPIX
→ HCPSX
→ FSHCX
→ 0P0000ZND1
→ 0P0000ZNCZ
→ 0P0001HN8S
→ 0P0000ZND0
→ 0P0001UN0T
→ 0P0001QSWI


ERROR:yfinance:$0P0001QSWI: possibly delisted; no price data found  (period=5d)


→ 0P0001E4ZM
→ 0P0000A30E
→ 0P00014ZDM
→ 0P00014ZDK
→ 0P00014ZDJ
→ 0P00014ZDL
→ 0P0001M2FP
→ ADNPX
→ ADNCX
→ 0P00016N7D
→ ADNRX
→ 0P0001M2FQ
→ ADNIX
→ 0P0001P6NR
→ BIPSX
→ ADNYX
→ ADNAX
→ BIPIX
→ 0P0001K6O1
→ 0P0001K6O0
→ 0P0000YW4P
→ PRIAX
→ 0P0000YW4O
→ ALMCX
→ PVMIX
→ PQIAX
→ ALMAX
→ ASIMX
→ ASYMX
→ ASMZX
→ BGALX
→ 0P0001KYH4
→ 0P0001KYHA
→ 0P0001KYH3
→ 0P0001KYH9
→ 0P0001KYH5
→ 0P0001KYH8
→ 0P0001KYH7
→ 0P0001614Y
→ 0P0001HT9P
→ PPLIX
→ 0P0001F4FX
→ 0P0001F4FY
→ 0P0001O1R9


ERROR:yfinance:$0P0001O1R9: possibly delisted; no price data found  (period=5d)


→ VYSCX


ERROR:yfinance:$VYSCX: possibly delisted; no price data found  (period=5d)
ERROR:yfinance:$VYSCX: possibly delisted; no price data found  (period=1mo)
ERROR:yfinance:$VYSCX: possibly delisted; no price data found  (period=3mo)
ERROR:yfinance:$VYSCX: possibly delisted; no price data found  (period=6mo)
ERROR:yfinance:$VYSCX: possibly delisted; no price data found  (period=1y)


→ 0P0001P72K
→ 0P0001P72I
→ 0P00001U90
→ 0P000198KS
→ 0P0001OUSR
→ 0P0001BRS2
→ 0P0001OTZD
→ 0P0000YW4M
→ 0P0000YW4L
→ UMPIX
→ UMPSX
→ 0P000132RI
→ 0P000132RM
→ 0P0001HD1K
→ 0P000132RJ
→ 0P0000ZWFS
→ 0P0000ZWFR
→ HGHCX
→ HBGHX
→ HGHYX
Fetching: MF Losers
→ MASFX
→ WAGTX
→ WIGTX
→ WASIX
→ JSFRX
→ JSFBX
→ JSFTX
→ 0P00011WBK
→ JSFDX
→ TGJNX
→ BGAEX
→ 0P0001T9Z1
→ 0P00011WC1
→ BGPTX
→ 0P00018TL9
→ 0P00018TLA
→ 0P00017HXX
→ 0P00001BHA
→ INKDX
→ DPTRX
→ 0P00016OCN
→ 0P00000D6E
→ 0P000160QS
→ 0P0001U8ZQ
→ 0P0000NC9G
→ 0P0000NCNP
→ 0P00015T0S
→ 0P00001NBB
→ 0P0001T7NZ
→ 0P00015T0P
→ 0P00015T0V
→ 0P0001Q6LW
→ PHSBX
→ 0P0001NI1H
→ 0P0001PGDG
→ 0P0001NI1I
→ 0P0001NI1J
→ 0P0001PGDH
→ 0P0001NI1K
→ 0P0000O32Z
→ 0P0001FD9J
→ FGIZX
→ FGITX
→ FGIUX
→ FGIOX
→ FGIRX
→ 0P0001RIJ8
→ 0P0001RMFV
→ 0P0001RIJ9
→ 0P0001RIR7
→ 0P0001QRHK
→ 0P0001HSS2
→ RYTPX
→ RYCBX
→ URPIX
→ RYTMX
→ 0P0001LHDM
→ 0P0001HHPV
→ 0P0001HHPU
→ URPSX
→ 0P0001QRHP
→ 0P0001KF6A
→ WIFMX
→ 0P0000A2ZN
→ 0P0000A2ZP
→ 0P0000NCO9
→ QGLDX
→ QG

ERROR:yfinance:$GHACX: possibly delisted; no price data found  (period=5d)


→ GHACX
→ WAFMX
→ PPZAX
→ IEAOX
Fetching: MF Top Performing
→ 0P00000C23
→ 0P00000C29
→ 0P00000CAQ
→ 0P00000HSG
→ 0P00000HSM
→ 0P00000ICD
→ 0P00000IRU
→ 0P00000IS9
→ 0P00000ISG
→ 0P00000MNK
→ 0P00000NMO
→ 0P00000OB6
→ 0P00000OB8
→ 0P00000RLL
→ 0P00000RMH
→ 0P00000RMI
→ 0P00000RMJ
→ 0P00000RMP
→ 0P00000RMS
→ 0P00000RNC
→ 0P00000RNM
→ 0P00000RNN
→ 0P00000RNO
→ 0P00000ROA
→ 0P00000RPL
→ 0P00000RPM
→ 0P00000RQA
→ 0P00000RQB
→ 0P00000RQD
→ 0P00000T4W
→ 0P00000UJD
→ 0P00000VK6
→ 0P00000WAS
→ 0P00000WAT
→ 0P00000WBE
→ 0P00000WBF
→ 0P00000WBZ
→ 0P00000WUL
→ 0P00000X72
→ 0P00000XX2
→ 0P00000YAP
→ 0P00000YD6
→ 0P00000YD9
→ 0P00000YDC
→ 0P00000YDF
→ 0P00000ZE0
→ 0P00000ZE7
→ 0P00000ZF0
→ 0P000010HG
→ 0P000010PE
→ 0P000010PL
→ 0P000011NM
→ 0P000013EG
→ 0P000013GL
→ 0P000014FB
→ 0P000014FE
→ 0P000015HL
→ 0P000015K8
→ 0P0000187T
→ 0P0000187W
→ 0P000018HA
→ 0P00001BHA
→ 0P00001BIN
→ 0P00001BJE
→ 0P00001BQ3
→ 0P00001BQK
→ 0P00001DS4
→ 0P00001EVJ
→ 0P00001SNB
→ 0P00001SUL
→ 0P00001T1G
→ 0P00001T82
→ 0P

Unnamed: 0,Symbol,1W Change %,1M Change %,3M Change %,6M Change %,1Y Change %,5Y Change %,Name,Category
0,FTPAX,,,,,11.53,11.21,First Trust Private Assets Fund,MF Gainers
1,0P0001UN1A,,13.5,-0.94,-1.34,-1.34,-1.34,--,MF Gainers
2,0P0001A9KH,,16.58,2.62,-4.2,5.24,-9.06,Pentaris Bares US Equity F USD Dis,MF Gainers
3,0P0000A413,,11.75,0.54,-6.26,-2.23,63.89,Pembroke American Growth Inc,MF Gainers
4,0P0001J4GT,,11.25,-3.88,-2.24,5.07,25.45,EMC Asset Management EMC US Eq P Acc,MF Gainers


In [10]:
# STEP 1: Fix Chrome + Chromedriver compatibility
!apt update -y
!apt install -y wget unzip
!wget -q -O chrome.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
!dpkg -i chrome.deb || apt-get -fy install
!apt install -y chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!rm chrome.deb

# Install Python packages
!pip install -q selenium yfinance webdriver-manager


[33m0% [Working][0m            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
[33m0% [Connecting to archive.ubuntu.com (185.125.190.83)] [Connecting to security.[0m[33m0% [Connecting to archive.ubuntu.com (185.125.190.83)] [Connecting to security.[0m                                                                               Get:2 https://dl.google.com/linux/chrome/deb stable InRelease [1,825 B]
Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:4 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:6 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:7 https://dl.google.com/linux/chrome/deb stable/main amd64 Packages [1,211 B]
Get:8 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:9 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,

In [7]:
!wget -q -O chrome.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
!dpkg -i chrome.deb || apt-get -fy install
!apt install -y chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!rm chrome.deb


(Reading database ... (Reading database ... 5%(Reading database ... 10%(Reading database ... 15%(Reading database ... 20%(Reading database ... 25%(Reading database ... 30%(Reading database ... 35%(Reading database ... 40%(Reading database ... 45%(Reading database ... 50%(Reading database ... 55%(Reading database ... 60%(Reading database ... 65%(Reading database ... 70%(Reading database ... 75%(Reading database ... 80%(Reading database ... 85%(Reading database ... 90%(Reading database ... 95%(Reading database ... 100%(Reading database ... 126715 files and directories currently installed.)
Preparing to unpack chrome.deb ...
Unpacking google-chrome-stable (136.0.7103.113-1) over (136.0.7103.113-1) ...
Setting up google-chrome-stable (136.0.7103.113-1) ...
Processing triggers for mailcap (3.70+nmu1ubuntu1) ...
Processing triggers for man-db (2.10.2-1) ...
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
chromium-chromedri

In [2]:
# Install Chrome + ChromeDriver
!wget -q -O chrome.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
!dpkg -i chrome.deb || apt-get -fy install
!apt install -y chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!rm chrome.deb

# Install Python packages
!pip install selenium yfinance webdriver-manager --quiet


Selecting previously unselected package google-chrome-stable.
(Reading database ... 126102 files and directories currently installed.)
Preparing to unpack chrome.deb ...
Unpacking google-chrome-stable (136.0.7103.113-1) ...
[1mdpkg:[0m dependency problems prevent configuration of google-chrome-stable:
 google-chrome-stable depends on libvulkan1; however:
  Package libvulkan1 is not installed.

[1mdpkg:[0m error processing package google-chrome-stable (--install):
 dependency problems - leaving unconfigured
Processing triggers for mailcap (3.70+nmu1ubuntu1) ...
Processing triggers for man-db (2.10.2-1) ...
Errors were encountered while processing:
 google-chrome-stable
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
Correcting dependencies... Done
The following additional packages will be installed:
  libvulkan1 mesa-vulkan-drivers
The following NEW packages will be installed:
  libvulkan1 mesa-vulkan-drivers
0 upgraded, 2 newly insta