In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime
import statsmodels.api as sm

# Problem 1
# Group 3: Matt Benbenek, Ben Teske, Sam Barbel
# Step 1: Fetch Data
start_date = "2022-01-01"
end_date = "2024-12-31"
risk_free_ticker = "^IRX"  # Use the 13-week Treasury yield as a proxy for risk-free rate
market_index_ticker = "^GSPC"  # S&P 500 index
equity_tickers = ["SWK"]  # Replace with desired stock tickers

# Fetch data
risk_free_data = yf.download(risk_free_ticker, start=start_date, end=end_date)["Close"]
market_data = yf.download(market_index_ticker, start=start_date, end=end_date)["Close"]
stock_data = yf.download(equity_tickers, start=start_date, end=end_date)["Close"]

# check the columns
stock_data.columns
import plotly.graph_objects as go

# Assuming stock_data['SWK'] contains the data for Stanley Black & Decker's stock
fig = go.Figure()

# Add a line plot for SWK stock prices
fig.add_trace(go.Scatter(
    x=stock_data.index,  # Assuming the index contains dates
    y=stock_data['SWK'],
    mode='lines',
    name='SWK Stock Prices'
))


# Customize the layout
fig.update_layout(
    title='SWK Stock Price Over Time',
    xaxis_title='Date',
    yaxis_title='Price',
    template='plotly_white',
    showlegend=True
)

# Show the plot
fig.show()
# Step 2: Prepare Data
# Calculate daily returns
market_returns = market_data.pct_change().dropna()
stock_returns = stock_data.pct_change().dropna()

# Convert risk-free rate from percentage to daily rate
risk_free_rate = risk_free_data / 100 / 252
risk_free_rate = risk_free_rate.reindex(market_returns.index, method="ffill")

# convert to pandas dataframe
risk_free_rate = pd.DataFrame(risk_free_rate).dropna()

# Merge All data together
merged_df = pd.merge(stock_returns,
                    pd.merge(market_returns, risk_free_rate,
                            left_index=True, right_index=True),
                    left_index=True, right_index=True)

# Calculate cumulative return
for col in merged_df.columns:
    merged_df[f"{col}_cumu_return"] = (1 + merged_df[col]).cumprod()
Above are the daily excess returns for SWK and the market index.
# Calculate market excess return
merged_df['market_excess_return'] = merged_df['^GSPC'] - merged_df['^IRX']
merged_df['dt'] = merged_df.index

In [None]:
# use CAPM model
stock = 'SWK'

# Stock excess return ~ market excess return
y = merged_df[stock] - merged_df['^IRX']
x = merged_df['market_excess_return']
model = sm.OLS(y, x).fit()
model = sm.OLS(y, sm.add_constant(x)).fit()
print(f"CAPM model {stock}: beta is")
print(model.params)
print(model.summary())
print(f"Alpha (intercept): {model.params[0]}")


The stock is more volatile than the market because the beta is greater than 1.
R-squared value is 0.299. Alpha is -.0011


In [None]:
x_pred = np.linspace(x.min(), x.max(), 100)  # Create a range of x values for prediction
X_pred = sm.add_constant(x_pred)  # Add constant for the intercept term
y_pred = model.predict(X_pred)  # Predict y values using the fitted model

fig = go.Figure()

# Add scatter plot for the original data
fig.add_trace(go.Scatter(x=x, y=y, mode='markers', name='Data'))

   # Add line plot for the regression line
fig.add_trace(go.Scatter(x=x_pred, y=y_pred, mode='lines', name='Regression Line', line=dict(color='red')))

   # Customize the layout
fig.update_layout(
title='SWK Regression Plot',
xaxis_title='Market Excess Return',
yaxis_title='SWK Excess Return',
showlegend=True
   )

fig.show()

Problem 2

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
# Step 1: Fetch Data
start_date = "2022-01-01"
end_date = "2024-12-31"
risk_free_ticker = "^IRX"  # Use the 13-week Treasury yield as a proxy for risk-free rate
market_index_ticker = "^GSPC"  # S&P 500 index
equity_tickers = ["SWK"]  # Replace with desired stock tickers

# Fetch data
risk_free_data = yf.download(risk_free_ticker, start=start_date, end=end_date)["Close"]
market_data = yf.download(market_index_ticker, start=start_date, end=end_date)["Close"]
stock_data = yf.download(equity_tickers, start=start_date, end=end_date)["Close"]

# Step 2: Prepare Data
# Calculate daily returns
market_returns = market_data.pct_change().dropna()
stock_returns = stock_data.pct_change().dropna()

In [None]:
# Import FF Factors
FF_df = pd.read_csv('fama_french.csv')
FF_df['Date'] = pd.to_datetime(FF_df['Date'], format='%Y%m%d')
FF_df['Mkt-RF'] = FF_df['Mkt-RF']/100
FF_df['SMB'] = FF_df['SMB']/100
FF_df['HML'] = FF_df['HML']/100
FF_df['RF'] = FF_df['RF']/100


# Merge the FF Factors to Stocks
stock_returns = stock_returns.merge(FF_df, on = 'Date', how = 'inner')

# print minimal and maximum dates
print(stock_returns['Date'].min(), stock_returns['Date'].max())

In [None]:
FF_df.head()

In [None]:
# use CAPM model
stock = 'SWK'

# Stock excess return ~ market excess return
y = stock_returns[stock] - stock_returns['RF']
x = stock_returns[['Mkt-RF', 'SMB', 'HML']]
model = sm.OLS(y, x).fit()
model = sm.OLS(y, sm.add_constant(x)).fit()
print(f"CAPM model {stock}: beta is")
print(model.summary())

The alpha is -.0010. The beta for the market is 1.0607. The coefficient for SMB 0.9110 and the coefficient for HML is 0.4233. The R-squared value for the 3-factor is .408. This is higher than the R-squared value for CAPM 0.299. All 3 coefficients are relevant because the P-value is very small. The SMB coefficient of 0.9110 means that SWK is a "small" company relative to the rest of the market. The HML coefficient of 0.4233 means that SWK behaves more like a value stock than a growth stock.

In [None]:
#Problem 3
start_date = "2022-01-01"
end_date = "2024-12-31"
risk_free_ticker = "^IRX"  # Use the 13-week Treasury yield as a proxy for risk-free rate
market_index_ticker = "^GSPC"  # S&P 500 index
equity_tickers = ["SWK", "AAPL", "JPM", "XOM", "JNJ", "KO", "TSLA", "WMT", "BA", "MCD"
]
# Step 2: Prepare Data

stock_data = yf.download(equity_tickers, start=start_date, end=end_date)["Close"]
# Calculate daily returns
market_returns = market_data.pct_change().dropna()
stock_returns = stock_data.pct_change().dropna()



In [None]:
stock_returns.describe()
df=stock_returns.describe()

In [None]:
df_t = df.T
print(df_t)


In [None]:
df_t['name'] = ['SWK', 'AAPL', 'JPM', 'XOM', 'JNJ', 'KO', 'TSLA', 'WMT', 'BA', 'MCD']
df_t.head()

In [None]:
skewnness = stock_returns.skew()
print(skewnness)

In [None]:
kurtosis = stock_returns.kurtosis()
print(kurtosis)

Above is the summary statistics for the 10 stocks.

In [None]:
from sklearn.cluster import KMeans

In [None]:
columns_for_clustering = ['mean', 'std']
X = df_t[columns_for_clustering]

In [None]:
optimal_clusters = 3

# K-Means Clustering
kmeans = KMeans(n_clusters=optimal_clusters, random_state=15).fit(X)
X['Cluster'] = kmeans.labels_

df_t['Cluster'] =  kmeans.labels_
cluster_analysis = df_t.groupby('Cluster').agg(mean_mean = ('mean', 'mean'),
                                                      mean_std= ('std', 'mean'),num_comp= ('name', 'count')
                                                      ).reset_index()

print("Cluster Analysis (Means and Record Counts):\n", cluster_analysis)

In [None]:
from sklearn.decomposition import PCA
import plotly.express as px

# Perform PCA to reduce to 2D
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)  # Use X without the Cluster column for PCA

# Add the PCA results and cluster labels to the dataframe
X['PCA1'] = X_pca[:, 0]
X['PCA2'] = X_pca[:, 1]
X['Cluster'] = kmeans.labels_
X['name'] = df_t['name']

# Create a Plotly scatter plot with company names as labels
fig = px.scatter(X, x='PCA1', y='PCA2', color='Cluster',
                 hover_data=['name'],  # Show company names on hover
                 title="PCA Projection of Clusters (Colored by Cluster, Labeled by Company Name)",
                 labels={'PCA1': 'Normalized Mean', 'PCA2': 'Normalized Standard Deviation'})

# Show the plot
fig.show()

Cluster 0 had the most companies with 7 out of ten. Cluster 1 only had one company. Cluster 2 had 2 companies. Both Apple and Tesla are high value tech stocks so it might explain why they are clustered together. Walmart is alone in Cluster 1; it is in retail and might be seasonal so it fluctuates more resulting in a higher standard deviation. A lot of the companies in Cluster 0 are value stocks, not growth stocks, so they are more stable. They are clustered with a negative normalized mean daily return which might mean they have all been underperforming in the last 3 years.