In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:

import pandas as pd
import yfinance as yf

apple = yf.download('AAPL', 
                      start='1980-12-12', 
                      end='2022-03-18', 
                      progress=True,
)
apple.to_csv('Apple Stock Price.csv')


import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt


# Daily Returns
stock_returns = stock_data.pct_change().dropna()
index_returns = index_data.pct_change().dropna()

# Align Data using concat with explicit index
returns_df = pd.concat([stock_returns, index_returns], axis=1, keys=['Stock', 'Market'])

# Ensure data is not empty after cleaning
if returns_df.empty:
    raise ValueError("Returns data is empty after alignment. Check data sources.")

# Excess Returns
returns_df["Stock_Excess"] = returns_df["Stock"] - risk_free_rate
returns_df["Market_Excess"] = returns_df["Market"] - risk_free_rate

# CAPM Estimation
X = returns_df["Market_Excess"]
Y = returns_df["Stock_Excess"]
X = sm.add_constant(X)

# Run Regression
model = sm.OLS(Y, X).fit()

# Output Results
alpha, beta = model.params
r_squared = model.rsquared
print(f"Alpha: {alpha:.6f}")
print(f"Beta: {beta:.6f}")
print(f"R-squared: {r_squared:.4f}")

# Regression Line
plt.figure(figsize=(10, 6))
plt.scatter(returns_df["Market_Excess"], returns_df["Stock_Excess"], alpha=0.5, label="Data Points")
plt.plot(returns_df["Market_Excess"], model.predict(X), color='red', label=f"CAPM Regression (Beta={beta:.2f})")
plt.axhline(0, color='black', linewidth=0.5, linestyle='--')
plt.axvline(0, color='black', linewidth=0.5, linestyle='--')
plt.xlabel("Market Excess Return")
plt.ylabel("Stock Excess Return")
plt.title(f"CAPM Regression: {stock_ticker} vs. {index_ticker}")
plt.legend()
plt.show()



In [None]:
#The Beta of 1.28 tells us that APPL is more volatile than the market. 
#This means that when the market moves 1%, APPL will move 1.28%. 
#APPL has greater potential returns during market upswings while gaving greater risk during market downswings.

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import yfinance as yf
import matplotlib.pyplot as plt

# Retrieve Fama-French Data
ff_url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip"
ff_data = pd.read_csv(ff_url, compression='zip', skiprows=3, index_col=0)
ff_data = ff_data.iloc[:-1]
ff_data.index = pd.to_datetime(ff_data.index, format='%Y%m%d')
ff_data = ff_data.astype(float) / 100

# Retrieve Stock Data
stock_ticker = "AAPL"
start_date, end_date = "2020-01-01", "2023-12-31"
stock_data = yf.download(stock_ticker, start=start_date, end=end_date)
stock_data['Daily Return'] = stock_data['Adj Close'].pct_change()

# Handling NaN values
stock_data = stock_data.fillna(0)  
ff_data = ff_data.fillna(0) 

# Compute Excess Returns
risk_free = ff_data['RF'].reindex(stock_data.index).dropna()
excess_return = stock_data['Daily Return'].dropna() - risk_free

# Prepare Independent Variables
ff_factors = ff_data[['Mkt-RF', 'SMB', 'HML']].reindex(excess_return.index).dropna()
X = sm.add_constant(ff_factors)  
Y = excess_return.loc[X.index]  

# Run Regression Model
model = sm.OLS(Y, X).fit()

# Display Results
print(model.summary())

# Compare R-squared with CAPM
capm_model = sm.OLS(Y, sm.add_constant(ff_factors[['Mkt-RF']])).fit()
print(f"CAPM R-squared: {capm_model.rsquared:.4f}")
print(f"Three-Factor Model R-squared: {model.rsquared:.4f}")

# Interpret Coefficients
print(f"Alpha: {model.params.iloc[0]:.6f}") 
print(f"Beta (MKT): {model.params['Mkt-RF']:.4f}")
print(f"SMB Coefficient: {model.params['SMB']:.4f} (Size Effect)")
print(f"HML Coefficient: {model.params['HML']:.4f} (Value Effect)")


# Regression
plt.figure(figsize=(10, 6))
plt.scatter(ff_factors['Mkt-RF'], excess_return, alpha=0.5, label="Data Points")
plt.plot(ff_factors['Mkt-RF'], model.predict(X), color='red', label=f"Regression Line")
plt.xlabel("Market Risk Premium (Mkt-RF)")
plt.ylabel("Excess Return")
plt.title(f"Fama-French Three-Factor Model Regression: {stock_ticker}")
plt.legend()
plt.show()

In [None]:
#Due to the increase in independant variables and new information, the Fama-French model better explains excess returns and therefore has the higher R-squared.
#Due to the negative values of SMB and HML, it seems as though Apple acts as both a growth stock and large-cap stock. 