In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 3500) #allows 3500 rows to be displayed
pd.options.mode.chained_assignment = None #supresses 'set with copy' warnings

In [3]:
# run mode 'spy' analyzes all companies from the S&P 500
# run mode 'wilshire' analyzes all companies from the Wilshire 5000
#run_mode = 'spy'
run_mode = 'wilshire'

In [11]:
# Get our set of companies to screen
if run_mode == 'spy':
    # S&P 500 companies are extracted from wikipedia
    table = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    df = table[0]
    df = df.rename(columns = {'Symbol': 'Ticker', 
                              'Security': 'Company'})
    df = df [['Ticker', 'Company', 'GICS Sector', 'GICS Sub-Industry']]
elif run_mode == 'wilshire':
    # Wilshire 5000 tickers are taken from a local file
    df = pd.read_excel('wilshire_5000_stocks.xlsx', header=3)
    df['Ticker'] = df['AAC']
else:
    print(f"ERROR: used invalid run mode: '{run_mode}'")

df

Unnamed: 0,AAC,0,N/A,10.17,0.1,N/A.1,N/A.2,0.2,1017,0.3,N/A.3,0.010859872745737,10.19,9.7,Ticker
0,AAL,0,Industrials,16.69,0.0,,,0.0,10846.85,,0.0,1.567551,21.42,11.65,AAL
1,AAME,0,Financial Services,2.48,0.008065,,,0.02,50.51073,0.0,,,3.77,2.1,AAME
2,AAN,0,Industrials,14.89,0.029828,,,0.444137,458.3213,0.0,,1.231646,23.04,7.58,AAN
3,AAOI,0,Technology,2.6,0.0,,,0.0,74.41861,,0.0,1.330614,4.56,1.48,AAOI
4,AAON,0,Industrials,77.76,0.00553,,,0.43,4136.796,61.137326,0.34127,0.850934,83.0,47.33,AAON
5,AAP,0,Consumer Cyclical,156.84,0.038256,0.5,0.903654,5.913952,9428.945,17.601987,0.687669,0.844475,227.79,138.52,AAP
6,AAPL,0,Technology,145.43,0.006326,-0.275217,-0.255744,0.906784,2303060.0,23.076064,0.14841,1.294022,178.58,124.17,AAPL
7,AAT,0,Real Estate,28.97,0.044184,0.066667,0.034564,1.258326,1753.324,41.731889,2.28247,0.710629,37.82,23.72,AAT
8,AAWW,0,Industrials,102.21,0.0,,,0.0,2899.105,7.129975,0.0,0.55149,102.27,58.7,AAWW
9,ABBV,0,Healthcare,146.6,0.040382,0.049645,0.090431,5.629217,259259.2,19.4289,0.749563,0.363173,169.46,131.49,ABBV


In [None]:
# Filter out Financials and Utilities
if run_mode == 'spy':
    def sector_filter(sector):
        if sector in ('Financials', 'Utilities'):
            return False
        else:
            return True

    initial_row_count = df.shape[0]
    df = df[df['GICS Sector'].apply(sector_filter)]
    new_row_count = df.shape[0]
    print(f'''Began with {initial_row_count} rows, {new_row_count} remain after filtering out companies from the financials and utilities sectors.''')

In [9]:
yf.Ticker("AAPL").info

{'zip': '95014', 'sector': 'Technology', 'fullTimeEmployees': 164000, 'longBusinessSummary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. It also sells various related services. In addition, the company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. Further, it provides AppleCare support and cloud services store services; and operates various platforms, including the App Store that allow customers to discover and download applications and digital content, such as books, music, video, games, and podcasts. Additionally, the company offers various services, such as Apple Arcade, a game subscription service; Apple Fitness+, a personalized fitness service; Apple Music, which offers users a curated listening experience with on-demand radio 

In [12]:
import yfinance as yf # Yahoo Finance API
from time import sleep
from joblib import Parallel, delayed # parallelizes API-calls
from tqdm import tqdm # provides progress bar

def get_ticker_info(ticker):
    # Calls Yahoo Finance API to get financial data for a given ticker
    # Includes 3x retry logic to deal with unreliable API behavior
    attempts = 0
    while attempts < 3:
        try:
            return yf.Ticker(ticker).info
        except Exception as err:
            attempts = attempts + 1
            print(f"WARNING: API call failed for ticker {ticker}... with error \n{err}\n Trying again in 3 seconds.")
            sleep(3)
    print(f"WARNING: Could not get API response for ticker {ticker} after 3 attempts... skipping!")
    return np.nan

# Executes get_ticker_info for all companies in our dataframe
# Parallelizes over n cores
n_cores = 4
ticker_info_array = Parallel(n_jobs=n_cores)(
    delayed(get_ticker_info)(t) 
    for t in tqdm(df['Ticker'].values, 
                  total=df.shape[0], 
                  position=0, 
                  leave=True))

100%|████████████████████████████████████████████████████████████████████████████| 3215/3215 [2:20:37<00:00,  2.62s/it]


In [16]:
ticker_info_array[0].keys()

dict_keys(['zip', 'sector', 'fullTimeEmployees', 'longBusinessSummary', 'city', 'phone', 'state', 'country', 'companyOfficers', 'website', 'maxAge', 'address1', 'industry', 'ebitdaMargins', 'profitMargins', 'grossMargins', 'operatingCashflow', 'revenueGrowth', 'operatingMargins', 'ebitda', 'targetLowPrice', 'recommendationKey', 'grossProfits', 'freeCashflow', 'targetMedianPrice', 'earningsGrowth', 'currentRatio', 'returnOnAssets', 'numberOfAnalystOpinions', 'targetMeanPrice', 'debtToEquity', 'returnOnEquity', 'targetHighPrice', 'totalCash', 'totalDebt', 'totalRevenue', 'totalCashPerShare', 'financialCurrency', 'revenuePerShare', 'quickRatio', 'recommendationMean', 'shortName', 'longName', 'isEsgPopulated', 'gmtOffSetMilliseconds', 'quoteType', 'messageBoardId', 'market', 'annualHoldingsTurnover', 'enterpriseToRevenue', 'beta3Year', 'enterpriseToEbitda', '52WeekChange', 'morningStarRiskRating', 'forwardEps', 'revenueQuarterlyGrowth', 'sharesOutstanding', 'fundInceptionDate', 'annualRepo

In [17]:
# Extract financial metrics of interest
def get_symbol(ticker_info):
    # Extracts sector from ticker info
    if 'shortName' in ticker_info:
        return ticker_info['shortName']
    return np.nan

def get_return_on_assets(ticker_info):
    # Extracts return-on-assets metric from ticker info
    if 'returnOnAssets' in ticker_info:
        return ticker_info['returnOnAssets']
    return np.nan

def get_price_to_earnings(ticker_info):
    # Extracts price-to-earnings ratio from ticker info
    if 'trailingPE' in ticker_info:
        return ticker_info['trailingPE']
    if 'forwardPE' in ticker_info:
        return ticker_info['forwardPE']
    return np.nan

def get_sector(ticker_info):
    # Extracts sector from ticker info
    if 'sector' in ticker_info:
        return ticker_info['sector']
    return np.nan

financials = pd.DataFrame(
    [(get_symbol(ticker_info), 
      get_return_on_assets(ticker_info),
      get_price_to_earnings(ticker_info),
      get_sector(ticker_info))
     for ticker_info in ticker_info_array if ticker_info],
    columns = ['Ticker', 'ROA', 'PE', 'Sector'])

financials

Unnamed: 0,Ticker,ROA,PE,Sector
0,"American Airlines Group, Inc.",0.0172,85.299995,Industrials
1,Atlantic American Corporation,0.00942,18.142857,Financial Services
2,"Aarons Holdings Company, Inc.",0.06023,29.092592,Industrials
3,"Applied Optoelectronics, Inc.",-0.07735,-3.662338,Technology
4,"AAON, Inc.",0.08286,63.944,Industrials
5,Advance Auto Parts Inc.,0.03815,19.902565,Consumer Cyclical
6,Apple Inc.,0.21214,24.684126,Technology
7,"American Assets Trust, Inc.",0.02339,42.985508,Real Estate
8,Atlas Air Worldwide Holdings,0.06181,8.20626,Industrials
9,AbbVie Inc.,0.09267,19.312,Healthcare


In [18]:
# Combine original dataframe with financial metrics
df = df.merge(financials, on='Ticker')
df

Unnamed: 0,AAC,0,N/A,10.17,0.1,N/A.1,N/A.2,0.2,1017,0.3,N/A.3,0.010859872745737,10.19,9.7,Ticker,ROA,PE,Sector
0,RH,0,Consumer Cyclical,323.99,0.0,,,0.0,7759.946372,10.509578,0.0,1.678503,441.67,207.37,RH,0.13081,34.844376,Consumer Cyclical


In [20]:
df = financials.copy(deep=True)
df['PE'] = df['PE'].astype('float64')
df = df[(df['PE'].notnull()) &
        (df['ROA'].notnull()) &
        (df['PE'] > 0) &
        (df['ROA'] > 0)]
df['PE'] = pd.to_numeric(df['PE'])
df['ROA'] = pd.to_numeric(df['ROA'])
df

Unnamed: 0,Ticker,ROA,PE,Sector
0,"American Airlines Group, Inc.",0.0172,85.299995,Industrials
1,Atlantic American Corporation,0.00942,18.142857,Financial Services
2,"Aarons Holdings Company, Inc.",0.06023,29.092592,Industrials
4,"AAON, Inc.",0.08286,63.944,Industrials
5,Advance Auto Parts Inc.,0.03815,19.902565,Consumer Cyclical
6,Apple Inc.,0.21214,24.684126,Technology
7,"American Assets Trust, Inc.",0.02339,42.985508,Real Estate
8,Atlas Air Worldwide Holdings,0.06181,8.20626,Industrials
9,AbbVie Inc.,0.09267,19.312,Healthcare
10,AmerisourceBergen Corporation,0.02988,19.231796,Healthcare


In [None]:
df['PE'] = df['PE'].astype('float64')
df = df[(df['PE'].notnull()) &
        (df['ROA'].notnull()) &
        (df['PE'] > 0) &
        (df['ROA'] > 0)]
df['PE'] = pd.to_numeric(df['PE'])
df['ROA'] = pd.to_numeric(df['ROA'])

In [21]:
# Final score is based on relative rankings for price-to-earnings and return-on-assets metrics (equally weighted)
# Companies with a relatively low price-to-earnings and high return-on-assets will come to the top
df['ROA_rank'] = df['ROA'].rank(ascending=False)
df['PE_rank'] = df['PE'].rank(ascending=True) 
df['Score'] = (df['ROA_rank'] + df['PE_rank']).rank(ascending=True)
df = df.sort_values('Score')

In [22]:
df.iloc[0:100]

Unnamed: 0,Ticker,ROA,PE,Sector,ROA_rank,PE_rank,Score
2058,"ProPhase Labs, Inc.",0.26369,2.676568,Healthcare,28.0,29.0,1.0
830,"VAALCO Energy, Inc.",0.24469,3.402985,Energy,33.0,37.0,2.0
801,GrafTech International Ltd.,0.26636,4.067901,Industrials,27.0,54.0,3.0
2302,SIGA Technologies Inc.,0.50962,5.337931,Healthcare,5.0,83.0,4.0
2330,SM Energy Company,0.19966,3.076555,Energy,72.0,32.0,5.0
1327,"Innoviva, Inc.",0.23528,5.709251,Healthcare,40.0,91.0,6.0
1541,"Laredo Petroleum, Inc.",0.15633,2.599459,Energy,115.0,27.0,7.0
1112,Alphabet Inc.,0.14927,1.435768,Communication Services,133.5,13.0,8.0
1111,Alphabet Inc.,0.14927,1.449893,Communication Services,133.5,14.0,9.0
2208,"Retractable Technologies, Inc.",0.14921,2.4625,Healthcare,135.0,23.0,10.0


In [None]:
df.sort_values("PE")

In [None]:
df[df['Ticker'] == 'HPQ']

In [None]:
df.to_csv('yfinance.csv')