In [1]:
### this file is intended to scrap stock names off of Finviz, with screening filter setted as Volumes over 1 Million,
### Price over 1 usd, volatility week over 5%, ATR > 0.75

In [27]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_finviz_stocks():
    
    # this url contains filters applied to the selection
    base_url = "https://finviz.com/screener.ashx?v=111&f=geo_usa,sh_avgvol_o1000,sh_price_o1,ta_averagetruerange_o0.75,ta_volatility_wo5&ft=3"
    
    # mimic human interaction to present scipt blocking
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    
    # Debug to make sure the url is accessable
    response = requests.get(base_url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to retrieve data: {response.status_code}")
        return []
    
    soup = BeautifulSoup(response.text, 'html.parser')
    total_pages = get_total_pages(soup)
    
    stocks = []
    
    # since there are different pages with the stocks in, we interate through each page to get the stock names
    
    for page in range(1, total_pages + 1):
        print(f"Scraping page {page} of {total_pages}")
        
        # change url with page number
        url = f"{base_url}&r={(page - 1) * 20 + 1}"
        response = requests.get(url, headers=headers)
        
        # same debugging step
        if response.status_code != 200:
            print(f"Failed to retrieve data: {response.status_code}")
            continue
        
        
        soup = BeautifulSoup(response.text, 'html.parser')
        cells = soup.find_all('td', align='left', height='10')
        
        
        # from inspection in the url, tab-link contains the stock name
        for cell in cells:
            link = cell.find('a', class_='tab-link')
            if link:
                ticker = link.text
                stocks.append(ticker)
    
    return stocks

# Scrape the stock symbols
stocks = scrape_finviz_stocks()

if stocks:
    # Save to a CSV file or use it directly
    stocks_df = pd.DataFrame(stocks, columns=['Stock Symbol'])
    stocks_df.to_csv('finviz_stocks.csv', index=False)
    print(stocks)
else:
    print("No stocks found.")

Scraping page 1 of 5
Scraping page 2 of 5
Scraping page 3 of 5
Scraping page 4 of 5
Scraping page 5 of 5
['AAOI', 'AGQ', 'AKRO', 'ALAB', 'AMC', 'APLS', 'ARQT', 'ARRY', 'AWIN', 'BE', 'BHVN', 'BITX', 'BOIL', 'BYON', 'CABA', 'CADL', 'CENX', 'CLSK', 'COIN', 'CONL', 'DELL', 'DJT', 'DPST', 'DXYZ', 'DYN', 'EDBL', 'ELF', 'ENPH', 'ENVX', 'EXAS', 'FLNC', 'FSLR', 'FWRD', 'GCT', 'GDXU', 'GH', 'GME', 'GTBP', 'HIMS', 'HOOD', 'IBRX', 'INSM', 'IRBT', 'JAGX', 'JANX', 'KOLD', 'LABU', 'LEGN', 'MARA', 'MGNI', 'MGNX', 'MRNA', 'MSTR', 'MULN', 'NRIX', 'NVAX', 'NVDL', 'NXT', 'OKLO', 'OSCR', 'OUST', 'PALI', 'PGY', 'PLCE', 'RDDT', 'RENT', 'RILY', 'RIOT', 'RNA', 'RUN', 'SAGE', 'SG', 'SGBX', 'SMCI', 'SMR', 'SOXL', 'SOXS', 'SPT', 'STOK', 'TNDM', 'TSLT', 'TSLZ', 'UNG', 'VSAT', 'VST', 'WISA', 'WOLF', 'WSM']


In [24]:

def get_total_pages(soup):
    """Returns the total number of pages from the Finviz screener."""
    pages = soup.find_all('a', class_='screener-pages')
    if pages:
        # Get the text of the last page link
        return int(pages[-2].text)
    return 1