In [None]:
from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime

In [2]:
start_date = '2024-11-22'
end_date = '2024-11-01'

In [3]:
file = 'Tickers_Example.csv'
df = pd.read_csv(file, header=None)
tickers = df[0].tolist()
valid_tickers = []
t_prices = {}

vol_start_date = '2023-10-01'
vol_end_date = '2024-09-30'
min_days = 18
min_vol = 100000
currency1 = 'CAD'
currency2 = 'USD'

#filtering by assignment parameters
for ticker in tickers:
    t = yf.Ticker(ticker)
    info = t.info
    hist = t.history(start=vol_start_date, end=vol_end_date)
    
    if hist.empty: 
        print("Skipped Delisted: ", ticker)
    
    elif info.get("financialCurrency") != currency1 and info.get("financialCurrency") != currency2:
        print("Skipped non Canadian/American: ", ticker)

    elif ticker in valid_tickers:
        print("Skipped Duplicate: ", ticker)

    else:
        monthly_dataframes = [data for _, data in hist.groupby(pd.Grouper(freq='MS'))]
        valid_months = []

    #checking if avg monthly volume over 100,000
        volume = 0
        months_counted = 0
        for month in monthly_dataframes:
            if not (len(month) < min_days):
                volume += month['Volume'].sum()
                months_counted += 1
        if (volume/months_counted) < min_vol:
            print("Skipped (Monthly volume not >= 100,000): ", ticker)
        else:
            valid_tickers.append(ticker)
            t_prices[ticker] = hist

print(valid_tickers)

$AGN: possibly delisted; no timezone found


Skipped Delisted:  AGN


$CELG: possibly delisted; no timezone found


Skipped Delisted:  CELG


$MON: possibly delisted; no timezone found


Skipped Delisted:  MON


$RTN: possibly delisted; no timezone found


Skipped Delisted:  RTN
['AAPL', 'ABBV', 'ABT', 'ACN', 'AIG', 'AMZN', 'AXP', 'BA', 'BAC', 'BB.TO', 'BIIB', 'BK', 'BLK', 'BMY', 'C', 'CAT', 'CL', 'KO', 'LLY', 'LMT', 'MO', 'MRK', 'PEP', 'PFE', 'PG', 'PM', 'PYPL', 'QCOM', 'RY.TO', 'SHOP.TO', 'T.TO', 'TD.TO', 'TXN', 'UNH', 'UNP', 'UPS', 'USB']


In [15]:
# combining the two indexes
start_date = '2023-09-01'
end_date = '2024-09-01'

benchmark_sp500 = '^GSPC'
benchmark_tsx60 = 'XIU.TO'

sp500_data = yf.Ticker(benchmark_sp500)
tsx60_data = yf.Ticker(benchmark_tsx60)

sp500_hist = sp500_data.history(start=start_date, end=end_date, interval="1d")
tsx60_hist = tsx60_data.history(start=start_date, end=end_date, interval="1d")

sp500_hist['Weekly Return'] = sp500_hist['Close'].resample('W-FRI').ffill().pct_change()
tsx60_hist['Weekly Return'] = tsx60_hist['Close'].resample('W-FRI').ffill().pct_change()

combined_benchmark = pd.DataFrame({
    'SP500 Weekly Return': sp500_hist['Weekly Return'],
    'TSX60 Weekly Return': tsx60_hist['Weekly Return']
})
combined_benchmark['Combined Weekly Return'] = combined_benchmark.mean(axis=1)

market_weekly_returns = combined_benchmark['Combined Weekly Return']

def get_beta(ticker, price, market_returns):
    prices = pd.DataFrame(price['Close'])
    prices.columns = [ticker]

    weekly_prices = prices.resample('W-FRI').ffill()
    weekly_returns = weekly_prices.pct_change(fill_method=None).dropna()

    aligned_data = pd.concat([weekly_returns[ticker], market_returns], axis=1).dropna()
    aligned_data.columns = [ticker, 'Market']

    market_variance = aligned_data['Market'].var()
    beta = aligned_data.cov().iat[0, 1] / market_variance
    return beta

t_betas = []
for ticker in valid_tickers:
    temp = get_beta(ticker, t_prices.get(ticker), market_weekly_returns)
    t_betas.append([ticker, temp])

tdict_betas = dict(t_betas)

print(tdict_betas)

{'UNH': -0.14709669459116836, 'LMT': -0.10777692408064499, 'KO': -0.020211684288452544, 'PG': 0.07784304727882323, 'ABBV': 0.24815192481576787, 'CL': 0.2843301493449392, 'ABT': 0.3044774911392439, 'PEP': 0.33422523485608535, 'PFE': 0.3471572082035393, 'T.TO': 0.3849923844533066, 'PM': 0.38739151148497064, 'MRK': 0.5442050548675047, 'MO': 0.6325242974762901, 'BMY': 0.6869397522191407, 'LLY': 0.6940268089523296, 'AAPL': 0.9192449194595208, 'TD.TO': 0.9417337748902963, 'RY.TO': 0.9779627145311078, 'UNP': 0.9849670666759541, 'ACN': 0.9854820739201312, 'AIG': 1.0535135258883734, 'BB.TO': 1.099256341824482, 'BK': 1.0996485502312965, 'CAT': 1.2406181191282946, 'BIIB': 1.2957231928118889, 'UPS': 1.4292542702619124, 'AXP': 1.4359516071433776, 'PYPL': 1.4466765564639739, 'AMZN': 1.4527059466216599, 'BA': 1.5616207722536062, 'TXN': 1.5781311832169054, 'BLK': 1.5813199998532177, 'BAC': 1.7997001558042436, 'QCOM': 1.904451498999726, 'C': 1.9782363685222677, 'USB': 1.9926063581307947, 'SHOP.TO': 2.7

In [None]:
def stock_weight(weight,close):
    # weight is the decimal weight of a given stock (if stock A takes up 10% of the portfolio the weight is given as 0.1)
    # close is the stock's close price on the day we buy it
    
    stock_num=0
    fees=0
    acc_stock_weight=0

    # finds the amount of cash we can spend on a given stock
    cost=1000000*weight

    # fixed_snum will return the number of stocks we could buy given that the fee is the fixed $3.95
    fixed_snum=(cost-3.95)/close
    
    # dyn_snum will return the number of stocks we could buy given that the fee is the dynamic $0.001 per stock
    dyn_snum=cost/(0.001+close)

    # if/else conditions will ensure the max amount of stocks is bought given the different fee options (allows us to keep the stock weight as close to original as possible
    if fixed_snum>=dyn_snum:
        stock_num=fixed_snum
        fees=3.95
    else:
        stock_num=dyn_snum
        fees=stock_num*0.001

    # finds the new weight of the stock given that a portion of the original weight is dedicated to paying the trading fees
    acc_stock_weight=(stock_num*close)/cost

    # all the important info is returned as a list
    return [stock_num,acc_stock_weight,fees]

In [None]:
# example of using stock_weight — DELETE LATER
stock_weight(0.1,20)[0]

In [None]:
test=pd.DataFrame({'stock': ['s1', 's2', 's3', 's4', 's5'], 'score':['4','3','1','2','5']})
track=pd.DataFrame({'score':[80,60,40,30,90]}, index=['s1', 's2', 's3', 's4', 's5'])
track
beta=pd.DataFrame({'score':[4,2,1,3,5]}, index=['s1', 's2', 's3', 's4', 's5'])
beta
los=list(beta.index)

In [None]:
# tracking is the DataFrame created by the function that calculates tracking error
# beta is the DataFrame created by the function that calculates tracking error
# los is the list of stocks (AFTER the pairs of best correlated stocks have been removed)
    # los should be the same as the index column on BOTH tracking and beta dataframes
def normalize_scores(tracking,beta,los):
    # creating a dataframe that uses los as an index
    rankeddf=pd.DataFrame(index=los)

    # normalizing tracking and beta scores via max normalization
    tracking['score']=tracking['score']/(tracking['score'].max())
    beta['score']=beta['score']/(beta['score'].max())

    # creating a list with the scores of each stock given its score in tracking & beta
    # the score must be a decimal number between 0-1 inclusive

    # initializing a list and iterator for the loop
    score_lst=[]
    i=0
    # this loop will create a score for each stock based off of the normalized tracking and beta scores
    # these scores are equally weighted and each score will be between 0-1
    while i<len(los):
        item_score=((tracking.loc[los[i],'score'])*0.5)+((beta.loc[los[i],'score'])*0.5)
        score_lst.append(item_score)
        i+=1

    # adding the scores to the dataframe and ranking them so the best score is on top
    rankeddf['score']=score_lst
    rankeddf=rankeddf.sort_values(by='score', ascending=False)
    return rankeddf

In [None]:
ranked=normalize_scores(track,beta,los)
ranked

In [None]:
# this portfolio determines the weighting of the portfolio based on the ranked scores
# each stock will be weighted as the percentage of its score out of the sum of all the scores
    # ie) stock_weight = stock_score/sum(list_of_stocks)
def pf_weighting(rankeddf,score_lst):
    # initializing a dataframe for stock weights
    pf_weight=pd.DataFrame(index=score_lst)
    sum_scores=rankeddf['score'].sum()

    # initializing list for weights and iterator
    weight_lst=[]
    i=0
    # function will create a list of weights for the given list of stocks
    while i<len(score_lst):
        item_weight=(rankeddf.loc[score_lst[i],'score'])/sum_scores
        weight_lst.append(item_weight)
        i+=1

    # putting the list of weights into the dataframe
    pf_weight['weight']=weight_lst
    return pf_weight

In [None]:
pf_weighting(ranked,list(ranked.index))

## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.