In [13]:
# Final script: Scrape top stocks per category (no Selenium), compute flows with names

import requests
from bs4 import BeautifulSoup
import pandas as pd
import yfinance as yf
import time

# Yahoo URLs to scrape with pagination support where applicable
category_urls = {
    "Top Gainers": "https://finance.yahoo.com/markets/stocks/gainers",
    "Most Active": "https://finance.yahoo.com/most-active",
    "Trending Now": "https://finance.yahoo.com/trending-tickers",
    "Top Losers": "https://finance.yahoo.com/markets/stocks/losers"
}

headers = {
    "User-Agent": "Mozilla/5.0"
}

# Scrape up to 100 stock symbols and names from a paginated Yahoo Finance category

def get_symbols_paginated(url, pages=1):
    results = []
    for page in range(pages):
        full_url = f"{url}?start={page * 100}&count=100"
        response = requests.get(full_url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")
        rows = soup.select("table tbody tr")
        for row in rows:
            cols = row.find_all("td")
            if len(cols) >= 2:
                symbol = cols[0].text.strip()
                name = cols[1].text.strip()
                results.append((symbol, name))
            if len(results) >= 100:
                break
        if len(results) >= 100:
            break
    return results

# Fallback for single-page non-paginated lists like Most Active or Trending Now

def get_symbols_simple(url):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")
    rows = soup.select("table tbody tr")
    results = []
    for row in rows:
        cols = row.find_all("td")
        if len(cols) >= 2:
            symbol = cols[0].text.strip()
            name = cols[1].text.strip()
            results.append((symbol, name))
        if len(results) >= 100:
            break
    return results

# Get percent change from yfinance
def get_change_pct(ticker, period):
    try:
        hist = yf.Ticker(ticker).history(period=period)
        if hist.empty or len(hist["Close"]) < 2:
            return None
        return round(((hist["Close"].iloc[-1] - hist["Close"].iloc[0]) / hist["Close"].iloc[0]) * 100, 2)
    except:
        return None

# Get all change periods
def get_all_changes(symbol):
    return {
        "Symbol": symbol,
        "1W Change %": get_change_pct(symbol, "5d"),
        "1M Change %": get_change_pct(symbol, "1mo"),
        "3M Change %": get_change_pct(symbol, "3mo"),
        "6M Change %": get_change_pct(symbol, "6mo"),
        "1Y Change %": get_change_pct(symbol, "1y"),
        "5Y Change %": get_change_pct(symbol, "5y")
    }

# Process all categories
all_data = []

for category, url in category_urls.items():
    print(f"Fetching: {category}")
    if category in ["Top Gainers", "Top Losers"]:
        symbol_name_pairs = get_symbols_paginated(url, pages=1)
    else:
        symbol_name_pairs = get_symbols_simple(url)

    for symbol, name in symbol_name_pairs:
        print(f"→ {symbol}")
        data = get_all_changes(symbol)
        data["Name"] = name
        data["Category"] = category
        all_data.append(data)
        time.sleep(1)  # avoid hitting rate limit

# Save and show
df = pd.DataFrame(all_data)
df.to_csv("market_category_flows.csv", index=False)
print("✅ Saved as market_category_flows.csv")
df.head()

Fetching: Top Gainers
→ SOC
→ ZI
→ ZIM
→ BGC
→ TXNM
→ UNH
→ TEO
→ BBAR
→ BMA
→ BAP
→ MRNA
→ TGS
→ CEPU
→ PAM
→ RYAAY
→ BNTX
→ DG
→ GGAL
→ ZLAB
→ KEP
→ QBTS
→ YPF
→ FINV
→ HSAI
→ VIST
→ HUM
→ VEON
→ TAL
→ GRFS
→ NVO
→ CHWY
→ EQX
→ ORLA
→ INTR
→ LEGN
→ QXO
→ MSTR
→ EGO
→ TTWO
→ MLTX
→ GFI
→ GEV
Fetching: Most Active
→ NVDA
→ LCID
→ QBTS
→ TSLA
→ PLTR
→ RGTI
→ UNH
→ ACHR
→ DFS
→ SOFI
→ SMCI
→ F
→ AAL
→ AUR
→ INTC
→ AAPL
→ LYG
→ QS
→ SOUN
→ HIMS
→ RIVN
→ MARA
→ ITUB
→ BTG
→ BAC
Fetching: Trending Now
→ PTIX
→ NVAX
→ ZIM
→ QMCO
→ SOC
→ PLTR
→ QS
→ FAAS
→ TMC
→ MEHCQ
→ SYTA
→ BCLI
→ BGC
→ GS
→ LAES
→ DG
→ MULN
→ SOUN
→ ASTS
→ GEV
→ QBTS
→ ARQQ
→ REGN
→ TXNM
→ MSFT
Fetching: Top Losers
→ ACHR
→ RGC
→ RUN
→ ASTS
→ FSLR
→ SOUN
→ PONY
→ AUR
→ BE
→ JOBY
→ IESC
→ SANM
→ TEM
→ ENLT
→ CELH
→ CQP
→ NWL
→ CAVA
→ U
→ SATS
→ BILI
→ GTLB
→ SIG
→ AI
→ RDDT
→ HIMS
→ AES
→ CIVI
→ VKTX
→ XPEV
→ OSCR
→ RUM
→ DAR
→ ENPH
→ RH
→ UPWK
→ TNXP
→ AEO
→ QDEL
→ OLED
→ NXT
→ INSP
→ ACHC
→ CLF
→ ZETA
→ SITM
→ IGT
→ DBRG

Unnamed: 0,Symbol,1W Change %,1M Change %,3M Change %,6M Change %,1Y Change %,5Y Change %,Name,Category
0,SOC,21.35,75.7,11.17,57.06,150.18,251.03,Sable Offshore Corp.,Top Gainers
1,ZI,1.69,32.99,0.29,3.03,-23.42,-70.0,ZoomInfo Technologies Inc.,Top Gainers
2,ZIM,15.58,52.7,17.39,7.65,59.66,552.39,ZIM Integrated Shipping Services Ltd.,Top Gainers
3,BGC,6.16,20.04,3.71,-3.91,11.97,296.35,"BGC Group, Inc.",Top Gainers
4,TXNM,6.8,11.17,12.35,20.64,52.63,67.3,"TXNM Energy, Inc.",Top Gainers


In [10]:
# STEP 1: Fix Chrome + Chromedriver compatibility
!apt update -y
!apt install -y wget unzip
!wget -q -O chrome.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
!dpkg -i chrome.deb || apt-get -fy install
!apt install -y chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!rm chrome.deb

# Install Python packages
!pip install -q selenium yfinance webdriver-manager


[33m0% [Working][0m            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
[33m0% [Connecting to archive.ubuntu.com (185.125.190.83)] [Connecting to security.[0m[33m0% [Connecting to archive.ubuntu.com (185.125.190.83)] [Connecting to security.[0m                                                                               Get:2 https://dl.google.com/linux/chrome/deb stable InRelease [1,825 B]
Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:4 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:6 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:7 https://dl.google.com/linux/chrome/deb stable/main amd64 Packages [1,211 B]
Get:8 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:9 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,

In [7]:
!wget -q -O chrome.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
!dpkg -i chrome.deb || apt-get -fy install
!apt install -y chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!rm chrome.deb


(Reading database ... (Reading database ... 5%(Reading database ... 10%(Reading database ... 15%(Reading database ... 20%(Reading database ... 25%(Reading database ... 30%(Reading database ... 35%(Reading database ... 40%(Reading database ... 45%(Reading database ... 50%(Reading database ... 55%(Reading database ... 60%(Reading database ... 65%(Reading database ... 70%(Reading database ... 75%(Reading database ... 80%(Reading database ... 85%(Reading database ... 90%(Reading database ... 95%(Reading database ... 100%(Reading database ... 126715 files and directories currently installed.)
Preparing to unpack chrome.deb ...
Unpacking google-chrome-stable (136.0.7103.113-1) over (136.0.7103.113-1) ...
Setting up google-chrome-stable (136.0.7103.113-1) ...
Processing triggers for mailcap (3.70+nmu1ubuntu1) ...
Processing triggers for man-db (2.10.2-1) ...
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
chromium-chromedri

In [2]:
# Install Chrome + ChromeDriver
!wget -q -O chrome.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
!dpkg -i chrome.deb || apt-get -fy install
!apt install -y chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!rm chrome.deb

# Install Python packages
!pip install selenium yfinance webdriver-manager --quiet


Selecting previously unselected package google-chrome-stable.
(Reading database ... 126102 files and directories currently installed.)
Preparing to unpack chrome.deb ...
Unpacking google-chrome-stable (136.0.7103.113-1) ...
[1mdpkg:[0m dependency problems prevent configuration of google-chrome-stable:
 google-chrome-stable depends on libvulkan1; however:
  Package libvulkan1 is not installed.

[1mdpkg:[0m error processing package google-chrome-stable (--install):
 dependency problems - leaving unconfigured
Processing triggers for mailcap (3.70+nmu1ubuntu1) ...
Processing triggers for man-db (2.10.2-1) ...
Errors were encountered while processing:
 google-chrome-stable
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
Correcting dependencies... Done
The following additional packages will be installed:
  libvulkan1 mesa-vulkan-drivers
The following NEW packages will be installed:
  libvulkan1 mesa-vulkan-drivers
0 upgraded, 2 newly insta