## Purpose of notebook

1. To explore how to use yfinance potentially to get data for fundamental analysis
2. Also to explore for model training (the time series forecasting models)

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np

### Export S&P 500 data

In [None]:
# get S&P 500 data and export to csv
data = yf.Ticker("^SPX")
prices = data.history(period='max', interval='1d')
prices.index = pd.to_datetime(prices.index, format='%Y-%m-%d')
prices.index = prices.index.strftime('%Y-%m-%d')

In [12]:
prices

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1927-12-30,17.660000,17.660000,17.660000,17.660000,0,0.0,0.0
1928-01-03,17.760000,17.760000,17.760000,17.760000,0,0.0,0.0
1928-01-04,17.719999,17.719999,17.719999,17.719999,0,0.0,0.0
1928-01-05,17.549999,17.549999,17.549999,17.549999,0,0.0,0.0
1928-01-06,17.660000,17.660000,17.660000,17.660000,0,0.0,0.0
...,...,...,...,...,...,...,...
2025-04-28,5529.220215,5553.660156,5468.640137,5528.750000,4257880000,0.0,0.0
2025-04-29,5508.870117,5571.950195,5505.700195,5560.830078,4747150000,0.0,0.0
2025-04-30,5499.439941,5581.839844,5433.240234,5569.060059,5449490000,0.0,0.0
2025-05-01,5625.140137,5658.910156,5597.350098,5604.140137,4935270000,0.0,0.0


In [13]:
prices.to_csv('../../data/sp500.csv')

In [14]:
# load list of companies in S&P 500
def load_data(url):
    html = pd.read_html(url, header=0)
    return html

url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
df = load_data(url)[0]

In [16]:
# export df as a csv file as well so that we can perform clustering analysis (using the industrial and sub industry data)
df.to_csv('../../data/sp500_companies_sector.csv')

In [17]:
companies = df['Symbol'].tolist()
# replace fullstops with dashes instead
companies = [c.replace('.', '-') for c in companies]

## Get the price data of all stocks in S&P 500

In [19]:
data = yf.Tickers(companies)

In [None]:
all_data = data.history(period='10y')

[*********************100%***********************]  503 of 503 completed


In [29]:
all_data

Price,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Ticker,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2015-05-04,38.636948,28.742270,42.641659,,38.864830,19.534626,80.176292,75.849998,51.467667,37.660133,...,474366,5075000,4627000,3386200,10343700,920200,14124910,1336631,289700,4782900
2015-05-05,38.314510,28.094624,42.101055,,38.740784,19.480742,79.505646,75.000000,50.263374,37.682594,...,373234,3103800,2304500,3824900,11895000,946900,6820629,2179686,369200,7681200
2015-05-06,38.314510,27.918198,42.153805,,38.484444,19.559982,78.953804,74.480003,50.157574,37.375744,...,289354,4116100,2542300,2701800,11273500,774600,4096773,1976982,309500,4441400
2015-05-07,38.507973,28.090872,42.654850,,38.451374,19.791368,80.218742,74.730003,50.483074,37.660133,...,288448,4486400,1598600,3473100,9992500,636400,4677516,1718349,353400,2427000
2015-05-08,39.152843,28.620131,43.010845,,39.336151,19.864269,81.407333,76.309998,51.036400,38.356174,...,176330,4471000,2746500,2629300,11390300,1238200,6767632,1282041,372100,2548200
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-28,106.870003,210.139999,192.339996,123.300003,129.529999,91.190002,293.250000,368.619995,193.720001,48.049999,...,804300,7708600,1443300,2994500,13890900,2427200,2218800,1299600,1094500,1873400
2025-04-29,107.459999,211.210007,193.509995,125.489998,130.500000,92.389999,298.470001,370.980011,192.669998,47.779999,...,586500,5244800,1463600,2539300,9483100,2864900,2339600,925700,1599800,2065100
2025-04-30,107.599998,212.500000,195.100006,121.919998,130.750000,90.680000,299.149994,374.980011,194.919998,47.750000,...,710200,5015900,1922700,4468300,20379300,2701500,3349500,1399600,762600,2926200
2025-05-01,106.459999,213.320007,193.339996,124.010002,130.880005,89.669998,300.529999,374.630005,193.740005,47.580002,...,600300,3875800,1772000,3389100,18610300,1829900,2734600,1400700,507800,1930300


In [28]:
all_data.to_csv('../../data/sp500_all_companies.csv')

## Get fundamental data for a specific company

In [2]:
data = yf.Ticker("AAPL")

In [None]:
bs = data.balance_sheet.transpose()
bs.index = bs.index.strftime('%Y-%m-%d')
bs = bs.to_dict()

income_statement = data.income_stmt.transpose()
income_statement.index = income_statement.index.strftime('%Y-%m-%d')
income_statement = income_statement.to_dict()

diluted_eps = income_statement['Diluted EPS']
book_value = bs['Stockholders Equity']
shares_outstanding = bs['Ordinary Shares Number']

# convert into format we can use
graham_numbers = {}

for date, eps, book_value, shares_outstanding in zip(diluted_eps.keys(), diluted_eps.values(), book_value.values(), shares_outstanding.values()):
    if np.isnan(eps) or np.isnan(book_value) or np.isnan(shares_outstanding):
        continue
    graham_numbers[date] = {}
    book_value_ps = book_value / shares_outstanding
    graham_numbers[date] = (22.5 * eps * book_value_ps) ** 0.5

graham_numbers

# book_value_ps = book_value / shares_outstanding
# (22.5 * diluted_eps * book_value_ps) ** 0.5

{'2024-09-30': 22.701794098299054,
 '2023-09-30': 23.478050809567907,
 '2022-09-30': 20.90282937968418,
 '2021-09-30': 22.017938467373487}

In [50]:
# calculate roe
bs = data.balance_sheet.transpose()
bs.index = bs.index.strftime('%Y-%m-%d')
bs = bs.to_dict()

income_statement = data.income_stmt.transpose()
income_statement.index = income_statement.index.strftime('%Y-%m-%d')
income_statement = income_statement.to_dict()

net_income = income_statement['Net Income']
shareholders = bs['Stockholders Equity']

roe = {}
for date, net_income, shareholders in zip(net_income.keys(), net_income.values(), shareholders.values()):
    if np.isnan(net_income) or np.isnan(shareholders):
        continue
    roe[date] = {}
    roe[date] = net_income / shareholders
roe

{'2024-09-30': 1.6459350307287095,
 '2023-09-30': 1.5607601454639075,
 '2022-09-30': 1.9695887275023682,
 '2021-09-30': 1.5007132667617689}

In [59]:
# calculate P/E ratio
data.info

{'address1': 'One Apple Park Way',
 'city': 'Cupertino',
 'state': 'CA',
 'zip': '95014',
 'country': 'United States',
 'phone': '(408) 996-1010',
 'website': 'https://www.apple.com',
 'industry': 'Consumer Electronics',
 'industryKey': 'consumer-electronics',
 'industryDisp': 'Consumer Electronics',
 'sector': 'Technology',
 'sectorKey': 'technology',
 'sectorDisp': 'Technology',
 'longBusinessSummary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to discover and download applications and digital content, such as books, music, video, games, and p

In [10]:
d = data.income_stmt.to_dict(orient='index')
d['Diluted EPS'].values()

dict_values([6.08, 6.13, 6.11, 5.61, nan])

In [55]:
data.balance_sheet.index

Index(['Treasury Shares Number', 'Ordinary Shares Number', 'Share Issued',
       'Net Debt', 'Total Debt', 'Tangible Book Value', 'Invested Capital',
       'Working Capital', 'Net Tangible Assets', 'Capital Lease Obligations',
       'Common Stock Equity', 'Total Capitalization',
       'Total Equity Gross Minority Interest', 'Stockholders Equity',
       'Gains Losses Not Affecting Retained Earnings',
       'Other Equity Adjustments', 'Retained Earnings', 'Capital Stock',
       'Common Stock', 'Total Liabilities Net Minority Interest',
       'Total Non Current Liabilities Net Minority Interest',
       'Other Non Current Liabilities', 'Tradeand Other Payables Non Current',
       'Long Term Debt And Capital Lease Obligation',
       'Long Term Capital Lease Obligation', 'Long Term Debt',
       'Current Liabilities', 'Other Current Liabilities',
       'Current Deferred Liabilities', 'Current Deferred Revenue',
       'Current Debt And Capital Lease Obligation',
       'Current C

In [None]:
net_income = data.income_stmt.loc['Net Income']
equity = data.balance_sheet.loc['Stockholders Equity']

2024-09-30   -19154000000.0
2023-09-30     -214000000.0
2022-09-30    -3068000000.0
2021-09-30     5562000000.0
2020-09-30              NaN
Name: Retained Earnings, dtype: object

In [41]:
equity

2024-09-30    56950000000.0
2023-09-30    62146000000.0
2022-09-30    50672000000.0
2021-09-30    63090000000.0
2020-09-30              NaN
Name: Stockholders Equity, dtype: object

In [44]:
(net_income / equity).dropna()

2024-09-30    1.645935
2023-09-30     1.56076
2022-09-30    1.969589
2021-09-30    1.500713
dtype: object

In [82]:
# could maybe write a function to get news about the company, then get the LLM to link it in its response
news = data.news

news[1]['content']['clickThroughUrl']['url']

'https://finance.yahoo.com/news/tech-support-apples-macbook-air-is-100-cheaper-and-still-among-the-best-laptops-you-can-buy-143011864.html'

In [4]:
def get_balance_sheet(ticker: str) -> dict:
    '''
    Get the balance sheet of the company given a ticker symbol.
    
    Potentially useful fields include:
        - Stockholders Equity
        - Retained Earnings
        - Total Assets
        - Net PPE
        - Gross PPE
        - Common Stock Equity
        - Total Debt
        - Tangible Book Value
        - Invested Capital
        - Cash And Cash Equivalents
        - Total Debt
        - Common Stock Equity
        - Current Assets
        - Current Liabilities
        - Receivables
        - Total Liabilities Net Minority Interest
        - Capital Lease Obligations
        - Working Capital
        - Accounts Receivable
        - Inventory
        - Cash Cash Equivalents And Short Term Investments
        - Other Short Term Investments

    Args:
        ticker (str): The ticker symbol of the company

    Returns:
        dict: The balance sheet of the company
    '''
    
    stock = yf.Ticker(ticker)
    bs = stock.balance_sheet.transpose()
    bs.index = bs.index.strftime('%Y-%m-%d')

    return bs.to_dict()

def get_income_statement(ticker: str) -> dict:
    '''
    Get the income statement of the company given a ticker symbol.

    Potentially useful fields include:
        - Revenue
        - Cost Of Goods Sold
        - Gross Profit
        - Operating Income
        - Net Income
        - Earnings Before Interest And Taxes (EBIT)
        - Earnings Before Interest Taxes Depreciation And Amortization (EBITDA)
        - Earnings Before Interest And Taxes (EBIT)
        - Earnings Before Interest Taxes Depreciation And Amortization (EBITDA)

    Args:
        ticker (str): The ticker symbol of the company

    Returns:
        dict: The income statement of the company
    '''
    
    stock = yf.Ticker(ticker)
    income_statement = stock.income_stmt.transpose()
    income_statement.index = income_statement.index.strftime('%Y-%m-%d')

    return income_statement.to_dict()

def get_cash_flow(ticker: str) -> dict:
    '''
    Get the cash flow statement of the company given a ticker symbol.

    Potentially useful fields include:
        - Operating Cash Flow
        - Investing Cash Flow
        - Financing Cash Flow
        - Free Cash Flow

    Args:
        ticker (str): The ticker symbol of the company

    Returns:
        dict: The cash flow statement of the company
    '''
    stock = yf.Ticker(ticker)
    cash_flow = stock.cash_flow.transpose()
    cash_flow.index = cash_flow.index.strftime('%Y-%m-%d')

    return cash_flow.to_dict()


In [10]:
ticker = yf.Ticker('AAPL')

ticker.get_shares_outstanding()

AttributeError: 'Ticker' object has no attribute 'get_shares_outstanding'

In [None]:
import yfinance as yf
import pandas as pd

def get_pe_ratio(ticker: str) -> list:
    '''
    Get the P/E ratio of the company given a ticker symbol.

    Args:
        ticker (str): The ticker symbol of the company

    Returns:
        list: A list P/E ratios for each year
    '''
    
    income_statement = get_income_statement(ticker)
    eps = income_statement['Diluted EPS']

    stock = yf.Ticker(ticker)
    prices = stock.history(period='max')
    prices['year'] = prices.index.year
    year_last_prices = prices.groupby('year').last()['Close']

    pe_ratios = []
    for date, eps in eps.items():
        if np.isnan(eps):
            pe_ratios.append((date, 'Unknown'))
            continue
        year = pd.to_datetime(date).year
        if year in year_last_prices.index:
            pe_ratio = year_last_prices[year] / eps
            pe_ratios.append((date, round(pe_ratio, 2)))

    return pe_ratios


income_statement = get_income_statement('AAPL')
net_income = income_statement['Net Income']

# 3. Calculate P/E ratio for each year
# Merge price data with earnings data (using year-end prices)
stock = yf.Ticker("AAPL")
prices = stock.history(period='max')
prices['year'] = prices.index.year
year_last_prices = prices.groupby('year').last()['Close']

eps = income_statement['Diluted EPS']
pe_ratios = []

for date, eps in eps.items():
    if np.isnan(eps):
        pe_ratios.append((date, 'Unknown'))
        continue
    year = pd.to_datetime(date).year
    if year in year_last_prices.index:
        pe_ratio = year_last_prices[year] / eps
        pe_ratios.append((date, round(pe_ratio, 2)))


# year_last_prices / earnings

# pe_ratios = pd.DataFrame({
#     'Price': year_end_prices,
#     'Earnings': earnings['Earnings'],
#     'P/E Ratio': year_end_prices / earnings['Earnings']
# })

# print(pe_ratios)

# stock = yf.Ticker("AMZN")
# shares_outstanding = stock.get_shares_full(start=list(net_income.keys())[-1], end=list(net_income.keys())[0])
# shares_outstanding.index = pd.to_datetime(shares_outstanding.index)
# shares_outstanding.groupby(shares_outstanding.index).last()
# shares_outstanding.index = shares_outstanding.index.strftime('%Y-%m-%d')

# eps = income_statement['Diluted EPS']

In [74]:
pe_ratios

[('2024-09-30', 41.14),
 ('2023-09-30', 31.22),
 ('2022-09-30', 21.02),
 ('2021-09-30', 31.11),
 ('2020-09-30', 'Unknown')]