In [290]:
# install main library YFinance
# Yahoo finance (yfinance) quickstart - https://github.com/ranaroussi/yfinance?tab=readme-ov-file#quick-start
# Alternative yahoo finance lib (yahoo_fin) - https://algotrading101.com/learn/yahoo-finance-api-guide/

%pip install yfinance

Note: you may need to restart the kernel to use updated packages.


In [252]:
# IMPORTS
import numpy as np
import pandas as pd

#Fin Data Sources
import yfinance as yf
import pandas_datareader as pdr

#Data viz
import plotly.graph_objs as go
import plotly.express as px
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

import time
from datetime import date
from datetime import datetime


# Understanding Data-Driven Decisions data pulls

In [6]:
end = date.today()
print(f'Year = {end.year}; month= {end.month}; day={end.day}')

start = date(year=end.year-70, month=end.month, day=end.day)
print(f'Period for indexes: {start} to {end} ')

Year = 2024; month= 4; day=18
Period for indexes: 1954-04-18 to 2024-04-18 


## Question 1: [Macro] Average growth of GDP in 2023
What is the average growth (in %) of GDP in 2023?

Download the timeseries Real Gross Domestic Product (GDPC1) from FRED (https://fred.stlouisfed.org/series/GDPC1). Calculate year-over-year (YoY) growth rate (that is, divide current value to one 4 quarters ago). Find the average YoY growth in 2023 (average from 4 YoY numbers). Round to 1 digit after the decimal point: e.g. if you get 5.66% growth => you should answer 5.7

In [132]:
gdpc1 = pdr.DataReader("GDPC1", "fred", start=start, end=end)

In [61]:
# Filter rows where year is 2023 and 2022
gdp_2023 = gdpc1.loc['2023-10-01'].GDPC1
gdp_2022 = gdpc1.loc['2022-10-01'].GDPC1
yoy_growth = round( (1-(gdp_2022/gdp_2023))*100, 2)
print(f"October 2023 YoY growth: {yoy_growth}%")

October 2023 YoY growth: 3.04%


In [137]:
gdpc1['gdppot_us_yoy'] = gdpc1.GDPC1/gdpc1.GDPC1.shift(4)-1
gdpc1['gdppot_us_qoq'] = gdpc1.GDPC1/gdpc1.GDPC1.shift(1)-1

# Select dates '2022-10-01','2023-01-01','2023-04-01','2023-07-01','2023-10-01'
# values = ['2023-01-01','2023-04-01','2023-07-01','2023-10-01']
gdpc1_n = gdpc1.tail(4)

average_value = round(gdpc1_n['gdppot_us_yoy'].mean()*100,2)
print(f"2023 YoY growth (last 4 quarters): {average_value}%")

2023 YoY growth (last 4 quarters): 2.54%


## Question 2. [Macro] Inverse "Treasury Yield"
Find the min value of (dgs10-dgs2) after since year 2000 (2000-01-01) and write it down as an answer, round to 1 digit after the decimal point.

Download DGS2 and DGS10 interest rates series (https://fred.stlouisfed.org/series/DGS2, https://fred.stlouisfed.org/series/DGS10). Join them together to one dataframe on date (you might need to read about pandas.DataFrame.join()), calculate the difference dgs10-dgs2 daily.

(Additional: think about what does the "inverted yield curve" mean for the market and investors? do you see the same thing in your country/market of interest? Do you think it can be a good predictive feature for the models?)

In [109]:
dgs10 = pdr.DataReader("DGS10", "fred", start=start, end='2000-01-01')
dgs2 = pdr.DataReader("DGS2", "fred", start=start, end='2000-01-01')

In [153]:
n_df = dgs10.join(dgs2, how='inner', on="DATE")
n_df['yield_diff'] = n_df['DGS10'] - n_df['DGS2']
inv_yield = n_df[n_df.index >= '2000-01-01']

min_yield = round(inv_yield['yield_diff'].min(),1)
print(f"Minimum yield since 2000-01-01: {min_yield}%")

Minimum yield since 2000-01-01: -1.1%


## Question 3. [Index] Which Index is better recently?
Compare S&P 500 and IPC Mexico indexes by the 5 year growth and write down the largest value as an answer (%)

Download on Yahoo Finance two daily index prices for S&P 500 (^GSPC, https://finance.yahoo.com/quote/%5EGSPC/) and IPC Mexico (^MXX, https://finance.yahoo.com/quote/%5EMXX/). Compare 5Y growth for both (between 2019-04-09 and 2024-04-09). Select the higher growing index and write down the growth in % (closest integer %). E.g. if ratio end/start was 2.0925 (or growth of 109.25%), you need to write down 109 as your answer.

(Additional: think of other indexes and try to download stats and compare the growth? Do create 10Y and 20Y growth stats. What is an average yearly growth rate (CAGR) for each of the indexes you select?)

In [241]:
sp500_daily = yf.download(tickers = "^GSPC", start="2019-04-09", end="2024-04-09", interval = "1d")
mxipc_daily = yf.download(tickers = "^MXX", start="2019-04-09", end="2024-04-09", interval = "1d")

print(sp500_daily)
print(mxipc_daily)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

                   Open         High          Low        Close    Adj Close  \
Date                                                                          
2019-04-09  2886.580078  2886.879883  2873.330078  2878.199951  2878.199951   
2019-04-10  2881.370117  2889.709961  2879.129883  2888.209961  2888.209961   
2019-04-11  2891.919922  2893.419922  2881.989990  2888.320068  2888.320068   
2019-04-12  2900.860107  2910.540039  2898.370117  2907.409912  2907.409912   
2019-04-15  2908.320068  2909.600098  2896.479980  2905.580078  2905.580078   
...                 ...          ...          ...          ...          ...   
2024-04-02  5204.290039  5208.339844  5184.049805  5205.810059  5205.810059   
2024-04-03  5194.370117  5228.750000  5194.370117  5211.490234  5211.490234   
2024-04-04  5244.049805  5256.589844  5146.060059  5147.209961  5147.209961   
2024-04-05  5158.950195  5222.180176  5157.209961  5204.339844  5204.339844   
2024-04-08  5211.370117  5219.569824  5197.350098  5




In [256]:
# Filter for first and last records from selected dates
sp500_1 = sp500_daily.head(1)['Close']
sp500_2 = sp500_daily.tail(1)['Close']
sp500_ratio = sp500_2.iloc[0]/sp500_1.iloc[0]

mxipc_1 = mxipc_daily.head(1)['Close']
mxipc_2 = mxipc_daily.tail(1)['Close']
mxipc_ratio = mxipc_2.iloc[0]/mxipc_1.iloc[0]

winner = mxipc_ratio if mxipc_ratio > sp500_ratio else sp500_ratio
print(f"Largest is {(winner - 1)*100}")


Largest is 80.75151917783086


## Question 4. [Stocks OHLCV] 52-weeks range ratio (2023) for the selected stocks
Find the largest range ratio [=(max-min)/max] of Adj.Close prices in 2023

Download the 2023 daily OHLCV data on Yahoo Finance for top6 stocks on earnings (https://companiesmarketcap.com/most-profitable-companies/): 2222.SR,BRK-B, AAPL, MSFT, GOOG, JPM.

Here is the example data you should see in Pandas for "2222.SR": https://finance.yahoo.com/quote/2222.SR/history

Calculate maximum-minimim "Adj.Close" price for each stock and divide it by the maximum "Adj.Close" value. Round the result to two decimal places (e.g. 0.1575 will be 0.16)

(Additional: why this may be important for your research?)

In [255]:
# Define tickers for top 6 stocks on earnings
tickers = ["2222.SR", "BRK-B", "AAPL", "MSFT", "GOOG", "JPM"]
start_date = datetime (year=2023, month=1, day=1)  # Start of 2023
end_date = datetime(year=2023, month=12, day=31)  # End of 2023 (52 weeks)

# Download OHLCV data for all tickers
data = yf.download(tickers, start=start_date, end=end_date)["Adj Close"]

# Find max-min and calculate range ratio for each stock
largest_range_ratio = 0
largest_range_ratio_ticker = None

for ticker in data.columns:
  # Get Adj. Close prices for the ticker
  adj_close_prices = data[ticker]
  
  # Calculate max, min, and range
  max_price = adj_close_prices.max()
  min_price = adj_close_prices.min()
  range_value = max_price - min_price
  
  # Calculate range ratio
  if max_price > 0:  # Avoid division by zero
    range_ratio = range_value / max_price
  else:
    range_ratio = 0
  
  # Update largest ratio and ticker if applicable
  if range_ratio > largest_range_ratio:
    largest_range_ratio = range_ratio
    largest_range_ratio_ticker = ticker
  
# Round result to two decimal places and print
largest_range_ratio = round(largest_range_ratio, 2)
print(f"Stock with Largest Range Ratio in 2023: {largest_range_ratio_ticker} (Ratio: {largest_range_ratio})")


[                       0%%                      ]

[*********************100%%**********************]  6 of 6 completed

Stock with Largest Range Ratio in 2023: MSFT (Ratio: 0.42)





## Question 5. [Stocks] Dividend Yield
Find the largest dividend yield for the same set of stocks

Use the same list of companies (2222.SR,BRK-B, AAPL, MSFT, GOOG, JPM) and download all dividends paid in 2023. You can use get_actions() method or .dividends field in yfinance library (https://github.com/ranaroussi/yfinance?tab=readme-ov-file#quick-start)

Sum up all dividends paid in 2023 per company and divide each value by the closing price (Adj.Close) at the last trading day of the year.

Find the maximum value in % and round to 1 digit after the decimal point. (E.g., if you obtained $1.25 dividends paid and the end year stock price is $100, the dividend yield is 1.25% -- and your answer should be equal to 1.3)

In [267]:
# Define tickers for top 6 stocks on earnings
tickers = ["2222.SR", "BRK-B", "AAPL", "MSFT", "GOOG", "JPM"]
year = 2023  # Year to analyze

# Download data
data = yf.download(tickers, start=datetime(2023, 1, 1), end=datetime(year=2023, month=12, day=31))

[*********************100%%**********************]  6 of 6 completed


In [280]:
# Download AAPL stock data
tickers = ["2222.SR", "BRK-B", "AAPL", "MSFT", "GOOG", "JPM"]
tickers_data = yf.Tickers('2222.SR BRK-B AAPL MSFT GOOG JPM')

In [289]:
# Function to calculate dividend yield
def calculate_dividend_yield(ticker_data):
  try:
    # Get dividends
    dividends_data = ticker_data.get_dividends()
    filtered_dividends = dividends_data.loc[dividends_data.index >= '2023-01-01']

    total_dividend = filtered_dividends.sum()
    # Get closing price at year-end
    start_date = datetime (year=2023, month=1, day=1)  # Start of 2023
    end_date = datetime(year=2023, month=12, day=31)  # End of 2023 (52 weeks)
    closing_price = ticker_data.history(start= datetime (year=2023, month=1, day=1), end=datetime(year=2023, month=12, day=31))["Close"].iloc[-1]

    #closing_price = ticker_data["Adj Close"].iloc[-1]
    
    # Calculate and format dividend yield (handle potential zero division)
    if closing_price > 0:
      dividend_yield = (total_dividend / closing_price) * 100
      return round(dividend_yield, 1)
    else:
      return 0.0
  except (KeyError, AttributeError):
    # Handle cases where dividend data might not be available
    return 0.0

# Find maximum dividend yield
largest_dividend_yield = 0
largest_yield_ticker = None

for ticker in tickers:
  # Get ticker data
  ticker_data = tickers_data.tickers[ticker]
  
  # Calculate dividend yield for the ticker
  dividend_yield = calculate_dividend_yield(ticker_data)
  
  # Update largest yield and ticker if applicable
  if dividend_yield > largest_dividend_yield:
    largest_dividend_yield = dividend_yield
    largest_yield_ticker = ticker

# Print result
print(f"Stock with Largest Dividend Yield ({year}): {largest_yield_ticker} (Yield: {largest_dividend_yield}%)")

Stock with Largest Dividend Yield (2023): 2222.SR (Yield: 2.8%)


## Question 6. [Exploratory] Investigate new metrics
Free text answer

Download and explore a few additional metrics or time series that might be valuable for your project and write down why (briefly).

In [311]:
# Define tickers list
tickers = ["COP=X", "BCOLOMBIA.CL", "ECOPETROL.CL"]

# Define start and end dates for 2023
start_date = pd.to_datetime("2022-12-01")
end_date = pd.to_datetime("2023-12-31")

# Download daily closing prices for all tickers
data = yf.download(tickers, start=start_date, end=end_date)["Close"]


[*********************100%%**********************]  3 of 3 completed


In [321]:
# Resample data to 'ME' (monthly) frequency, selecting the 'last' closing price for each month
monthly_closing_prices = data.resample('ME').last()

mom_growth = monthly_closing_prices.copy()
mom_growth = round((mom_growth/mom_growth.shift(1)-1)*100, 2)
print(mom_growth)

year_growth = monthly_closing_prices.copy()
year_growth = round((year_growth/year_growth.shift(12)-1)*100, 2)
print(year_growth.iloc[-1]) # Display last value


Ticker      BCOLOMBIA.CL        COP=X  ECOPETROL.CL
Date                                               
2022-12-31       42500.0  4848.129883        2420.0
2023-01-31       41990.0  4600.430176        2669.0
2023-02-28       35900.0  4777.259766        2698.0
2023-03-31       34990.0  4643.250000        2450.0
2023-04-30       36000.0  4654.750000        2288.0
2023-05-31       27800.0  4391.250000        2036.0
2023-06-30       31000.0  4167.850098        2140.0
2023-07-31       33300.0  3926.290039        2206.0
2023-08-31       29100.0  4096.779785        2357.0
2023-09-30       30810.0  4071.250000        2365.0
2023-10-31       29350.0  4060.000000        2423.0
2023-11-30       31720.0  3988.850098        2520.0
2023-12-31       33200.0  3879.750000        2340.0
Ticker      BCOLOMBIA.CL  COP=X  ECOPETROL.CL
Date                                         
2022-12-31           NaN    NaN           NaN
2023-01-31         -1.20  -5.11         10.29
2023-02-28        -14.50   3.84     

## Question 7. [Exploratory] Time-driven strategy description around earnings releases

Free text answer

Explore earning dates for the whole month of April - e.g. using YahooFinance earnings calendar (https://finance.yahoo.com/calendar/earnings?from=2024-04-21&to=2024-04-27&day=2024-04-23). Compare with the previous closed earnings (e.g., recent dates with full data https://finance.yahoo.com/calendar/earnings?from=2024-04-07&to=2024-04-13&day=2024-04-08).

Describe an analytical strategy/idea (you're not required to implement it) to select a subset companies of interest based on the future events data.

Answer:
Explore the sentiment evolution on the stocks and the frequency of the news a day prior to the earnings release.
For sentiment, pick those stocks which are not overhyped and overvalued, also, avoid those who are recently being negatively projected by news and general sentiment.

As for frequency of news, it should serve as an potentiator of the sentiment for those stocks that are negatively talked about and also overhyped. Conservative news stocks should be the general pick.

The goal is to pick stocks that are doing well and flying under the radar, hopefully these companies are focused on their main activities instead of the hyping and marketing of their products and looking for sales boost. If they fit into the stocks this analysis expects to find, they should be a good mid to long term investment.