## Purpose of notebook

1. To explore how to use yfinance potentially to get data for fundamental analysis
2. Also to explore for model training (the time series forecasting models)

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np

In [52]:
# get S&P 500 data and export to csv
data = yf.Ticker("^SPX")
data.history(period='max').to_csv('../data/sp500.csv')

In [53]:
# load list of companies in S&P 500
def load_data(url):
    html = pd.read_html(url, header=0)
    return html

url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
df = load_data(url)[0]

In [67]:
# export df as a csv file as well so that we can perform clustering analysis (using the industrial and sub industry data)
df.to_csv('../data/sp500_companies_sector.csv')

In [54]:
companies = df['Symbol'].tolist()
# replace fullstops with dashes instead
companies = [c.replace('.', '-') for c in companies]

## Get the price data of all stocks in S&P 500

In [None]:
data = yf.Tickers(companies)

In [57]:
all_data = data.history(period='10y')
all_data.to_csv('../data/sp500_all_companies.csv')

[*********************100%***********************]  503 of 503 completed


## Get fundamental data for a specific company

In [2]:
data = yf.Ticker("AAPL")

In [None]:
bs = data.balance_sheet.transpose()
bs.index = bs.index.strftime('%Y-%m-%d')
bs = bs.to_dict()

income_statement = data.income_stmt.transpose()
income_statement.index = income_statement.index.strftime('%Y-%m-%d')
income_statement = income_statement.to_dict()

diluted_eps = income_statement['Diluted EPS']
book_value = bs['Stockholders Equity']
shares_outstanding = bs['Ordinary Shares Number']

# convert into format we can use
graham_numbers = {}

for date, eps, book_value, shares_outstanding in zip(diluted_eps.keys(), diluted_eps.values(), book_value.values(), shares_outstanding.values()):
    if np.isnan(eps) or np.isnan(book_value) or np.isnan(shares_outstanding):
        continue
    graham_numbers[date] = {}
    book_value_ps = book_value / shares_outstanding
    graham_numbers[date] = (22.5 * eps * book_value_ps) ** 0.5

graham_numbers

# book_value_ps = book_value / shares_outstanding
# (22.5 * diluted_eps * book_value_ps) ** 0.5

{'2024-09-30': 22.701794098299054,
 '2023-09-30': 23.478050809567907,
 '2022-09-30': 20.90282937968418,
 '2021-09-30': 22.017938467373487}

In [50]:
# calculate roe
bs = data.balance_sheet.transpose()
bs.index = bs.index.strftime('%Y-%m-%d')
bs = bs.to_dict()

income_statement = data.income_stmt.transpose()
income_statement.index = income_statement.index.strftime('%Y-%m-%d')
income_statement = income_statement.to_dict()

net_income = income_statement['Net Income']
shareholders = bs['Stockholders Equity']

roe = {}
for date, net_income, shareholders in zip(net_income.keys(), net_income.values(), shareholders.values()):
    if np.isnan(net_income) or np.isnan(shareholders):
        continue
    roe[date] = {}
    roe[date] = net_income / shareholders
roe

{'2024-09-30': 1.6459350307287095,
 '2023-09-30': 1.5607601454639075,
 '2022-09-30': 1.9695887275023682,
 '2021-09-30': 1.5007132667617689}

In [59]:
# calculate P/E ratio
data.info

{'address1': 'One Apple Park Way',
 'city': 'Cupertino',
 'state': 'CA',
 'zip': '95014',
 'country': 'United States',
 'phone': '(408) 996-1010',
 'website': 'https://www.apple.com',
 'industry': 'Consumer Electronics',
 'industryKey': 'consumer-electronics',
 'industryDisp': 'Consumer Electronics',
 'sector': 'Technology',
 'sectorKey': 'technology',
 'sectorDisp': 'Technology',
 'longBusinessSummary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to discover and download applications and digital content, such as books, music, video, games, and p

In [10]:
d = data.income_stmt.to_dict(orient='index')
d['Diluted EPS'].values()

dict_values([6.08, 6.13, 6.11, 5.61, nan])

In [55]:
data.balance_sheet.index

Index(['Treasury Shares Number', 'Ordinary Shares Number', 'Share Issued',
       'Net Debt', 'Total Debt', 'Tangible Book Value', 'Invested Capital',
       'Working Capital', 'Net Tangible Assets', 'Capital Lease Obligations',
       'Common Stock Equity', 'Total Capitalization',
       'Total Equity Gross Minority Interest', 'Stockholders Equity',
       'Gains Losses Not Affecting Retained Earnings',
       'Other Equity Adjustments', 'Retained Earnings', 'Capital Stock',
       'Common Stock', 'Total Liabilities Net Minority Interest',
       'Total Non Current Liabilities Net Minority Interest',
       'Other Non Current Liabilities', 'Tradeand Other Payables Non Current',
       'Long Term Debt And Capital Lease Obligation',
       'Long Term Capital Lease Obligation', 'Long Term Debt',
       'Current Liabilities', 'Other Current Liabilities',
       'Current Deferred Liabilities', 'Current Deferred Revenue',
       'Current Debt And Capital Lease Obligation',
       'Current C

In [None]:
net_income = data.income_stmt.loc['Net Income']
equity = data.balance_sheet.loc['Stockholders Equity']

2024-09-30   -19154000000.0
2023-09-30     -214000000.0
2022-09-30    -3068000000.0
2021-09-30     5562000000.0
2020-09-30              NaN
Name: Retained Earnings, dtype: object

In [41]:
equity

2024-09-30    56950000000.0
2023-09-30    62146000000.0
2022-09-30    50672000000.0
2021-09-30    63090000000.0
2020-09-30              NaN
Name: Stockholders Equity, dtype: object

In [44]:
(net_income / equity).dropna()

2024-09-30    1.645935
2023-09-30     1.56076
2022-09-30    1.969589
2021-09-30    1.500713
dtype: object

In [82]:
# could maybe write a function to get news about the company, then get the LLM to link it in its response
news = data.news

news[1]['content']['clickThroughUrl']['url']

'https://finance.yahoo.com/news/tech-support-apples-macbook-air-is-100-cheaper-and-still-among-the-best-laptops-you-can-buy-143011864.html'

In [None]:
# TODO: need to figure out what to look out for when investing, to know what data to extract and create functions for the LLM
# TODO: function to link latest news for the user to read in LLM response