## Purpose of notebook

1. To explore how to use yfinance potentially to get data for fundamental analysis
2. Also to explore for model training (the time series forecasting models)

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np

In [52]:
# get S&P 500 data and export to csv
data = yf.Ticker("^SPX")
data.history(period='max').to_csv('../data/sp500.csv')

In [53]:
# load list of companies in S&P 500
def load_data(url):
    html = pd.read_html(url, header=0)
    return html

url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
df = load_data(url)[0]

In [67]:
# export df as a csv file as well so that we can perform clustering analysis (using the industrial and sub industry data)
df.to_csv('../data/sp500_companies_sector.csv')

In [54]:
companies = df['Symbol'].tolist()
# replace fullstops with dashes instead
companies = [c.replace('.', '-') for c in companies]

## Get the price data of all stocks in S&P 500

In [None]:
data = yf.Tickers(companies)

In [57]:
all_data = data.history(period='10y')
all_data.to_csv('../data/sp500_all_companies.csv')

[*********************100%***********************]  503 of 503 completed


## Get fundamental data for a specific company

In [3]:
data = yf.Ticker("AAPL")

In [9]:
# get balance sheet, we can have some way to extract some of the data
# balance sheet data
data.balance_sheet

Unnamed: 0,2024-09-30,2023-09-30,2022-09-30,2021-09-30,2020-09-30
Treasury Shares Number,,0.0,,,
Ordinary Shares Number,15116786000.0,15550061000.0,15943425000.0,16426786000.0,
Share Issued,15116786000.0,15550061000.0,15943425000.0,16426786000.0,
Net Debt,76686000000.0,81123000000.0,96423000000.0,89779000000.0,
Total Debt,106629000000.0,111088000000.0,132480000000.0,136522000000.0,
...,...,...,...,...,...
Cash Cash Equivalents And Short Term Investments,65171000000.0,61555000000.0,48304000000.0,62639000000.0,
Other Short Term Investments,35228000000.0,31590000000.0,24658000000.0,27699000000.0,
Cash And Cash Equivalents,29943000000.0,29965000000.0,23646000000.0,34940000000.0,
Cash Equivalents,2744000000.0,1606000000.0,5100000000.0,17635000000.0,


In [70]:
# income statement data
data.income_stmt

Unnamed: 0,2024-09-30,2023-09-30,2022-09-30,2021-09-30,2020-09-30
Tax Effect Of Unusual Items,0.0,0.0,0.0,0.0,
Tax Rate For Calcs,0.241,0.147,0.162,0.133,
Normalized EBITDA,134661000000.0,125820000000.0,130541000000.0,123136000000.0,
Net Income From Continuing Operation Net Minority Interest,93736000000.0,96995000000.0,99803000000.0,94680000000.0,
Reconciled Depreciation,11445000000.0,11519000000.0,11104000000.0,11284000000.0,
Reconciled Cost Of Revenue,210352000000.0,214137000000.0,223546000000.0,212981000000.0,
EBITDA,134661000000.0,125820000000.0,130541000000.0,123136000000.0,
EBIT,123216000000.0,114301000000.0,119437000000.0,111852000000.0,
Net Interest Income,,-183000000.0,-106000000.0,198000000.0,890000000.0
Interest Expense,,3933000000.0,2931000000.0,2645000000.0,2873000000.0


In [69]:
# cash flow data
data.cash_flow

Unnamed: 0,2024-09-30,2023-09-30,2022-09-30,2021-09-30,2020-09-30
Free Cash Flow,108807000000.0,99584000000.0,111443000000.0,92953000000.0,
Repurchase Of Capital Stock,-94949000000.0,-77550000000.0,-89402000000.0,-85971000000.0,
Repayment Of Debt,-9958000000.0,-11151000000.0,-9543000000.0,-8750000000.0,
Issuance Of Debt,0.0,5228000000.0,5465000000.0,20393000000.0,
Issuance Of Capital Stock,,,,1105000000.0,880000000.0
Capital Expenditure,-9447000000.0,-10959000000.0,-10708000000.0,-11085000000.0,
Interest Paid Supplemental Data,,3803000000.0,2865000000.0,2687000000.0,3002000000.0
Income Tax Paid Supplemental Data,26102000000.0,18679000000.0,19573000000.0,25385000000.0,
End Cash Position,29943000000.0,30737000000.0,24977000000.0,35929000000.0,
Beginning Cash Position,30737000000.0,24977000000.0,35929000000.0,39789000000.0,


In [82]:
# could maybe write a function to get news about the company, then get the LLM to link it in its response
news = data.news

news[1]['content']['clickThroughUrl']['url']

'https://finance.yahoo.com/news/tech-support-apples-macbook-air-is-100-cheaper-and-still-among-the-best-laptops-you-can-buy-143011864.html'

In [None]:
# TODO: need to figure out what to look out for when investing, to know what data to extract and create functions for the LLM
# TODO: function to link latest news for the user to read in LLM response