In [1]:
import sys
sys.path.append('../scripts')  # Appending the path to access the scripts folder
from EDA import * 

In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
# # Set up visualization style
# plt.style.use('seaborn-darkgrid')
# sns.set_palette('muted')

In [None]:




# Define assets, time period, and fetch function for data
tickers = ['TSLA', 'BND', 'SPY']
start_date = '2015-01-01'
end_date = '2024-10-31'

def fetch_data(ticker):
    data = yf.download(ticker, start=start_date, end=end_date)
    data['Ticker'] = ticker  # Add ticker column
    return data

# Fetch and combine data for each asset
data = pd.concat([fetch_data(ticker) for ticker in tickers])
data.reset_index(inplace=True)

# Data Cleaning
# Check for missing values and handle them by forward filling
data.fillna(method='ffill', inplace=True)

# Ensure all columns have appropriate data types
data['Date'] = pd.to_datetime(data['Date'])

# Check basic statistics to understand data distribution
print("Basic Statistics:\n", data.describe())

# Exploratory Data Analysis (EDA)
# Plot Closing Price trends over time
plt.figure(figsize=(14, 7))
for ticker in tickers:
    subset = data[data['Ticker'] == ticker]
    plt.plot(subset['Date'], subset['Close'], label=ticker)
plt.title("Closing Prices Over Time")
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.legend()
plt.show()

# Calculate daily percentage change to observe volatility
data['Daily Return'] = data.groupby('Ticker')['Adj Close'].pct_change()

# Plot daily percentage change (volatility) for each asset
plt.figure(figsize=(14, 7))
for ticker in tickers:
    subset = data[data['Ticker'] == ticker]
    plt.plot(subset['Date'], subset['Daily Return'], label=f'{ticker} Daily Return')
plt.title("Daily Percentage Change (Volatility)")
plt.xlabel("Date")
plt.ylabel("Daily Return")
plt.legend()
plt.show()

# Calculate and plot rolling mean and standard deviation for volatility analysis
data['Rolling Mean'] = data.groupby('Ticker')['Adj Close'].transform(lambda x: x.rolling(window=30).mean())
data['Rolling Std'] = data.groupby('Ticker')['Adj Close'].transform(lambda x: x.rolling(window=30).std())

# Plot 30-day rolling mean and standard deviation
plt.figure(figsize=(14, 7))
for ticker in tickers:
    subset = data[data['Ticker'] == ticker]
    plt.plot(subset['Date'], subset['Rolling Mean'], label=f'{ticker} 30-Day Rolling Mean')
    plt.plot(subset['Date'], subset['Rolling Std'], label=f'{ticker} 30-Day Rolling Std')
plt.title("30-Day Rolling Mean and Standard Deviation")
plt.xlabel("Date")
plt.ylabel("Price / Std Dev")
plt.legend()
plt.show()

# Seasonality and Trend Analysis using Decomposition
for ticker in tickers:
    subset = data[data['Ticker'] == ticker].set_index('Date')
    decomposition = seasonal_decompose(subset['Adj Close'], model='multiplicative', period=252)  # 252 trading days
    decomposition.plot()
    plt.suptitle(f"Seasonal Decomposition of {ticker} Adj Close Price", fontsize=16)
    plt.show()

# Outlier Detection
# Identify significant anomalies in daily returns
for ticker in tickers:
    subset = data[data['Ticker'] == ticker]
    threshold = 3 * subset['Daily Return'].std()
    outliers = subset[np.abs(subset['Daily Return']) > threshold]
    print(f"\nSignificant anomalies in {ticker} (Daily Returns > 3 Std Dev):")
    print(outliers[['Date', 'Daily Return']])

# Additional Risk and Return Analysis
# Value at Risk (VaR) and Sharpe Ratio calculation
var_dict = {}
sharpe_dict = {}
for ticker in tickers:
    subset = data[data['Ticker'] == ticker]
    var = subset['Daily Return'].quantile(0.05)
    var_dict[ticker] = var
    sharpe_ratio = subset['Daily Return'].mean() / subset['Daily Return'].std()
    sharpe_dict[ticker] = sharpe_ratio

print("\nValue at Risk (VaR) at 5% confidence level:")
for ticker, var in var_dict.items():
    print(f"{ticker}: {var}")

print("\nSharpe Ratio for each asset:")
for ticker, sharpe in sharpe_dict.items():
    print(f"{ticker}: {sharpe}")

# Insightful Extras
# Distribution of daily returns to understand return volatility per asset
plt.figure(figsize=(14, 7))
for ticker in tickers:
    sns.histplot(data[data['Ticker'] == ticker]['Daily Return'], kde=True, label=ticker, bins=50)
plt.title("Distribution of Daily Returns")
plt.xlabel("Daily Return")
plt.legend()
plt.show()

# Summary of Key Insights (Documented)
print("\nInsights Summary:")
print("- Tesla (TSLA): High volatility, with potential for high returns. High rolling standard deviation.")
print("- Vanguard Total Bond Market ETF (BND): Low volatility, adding stability to portfolios.")
print("- S&P 500 ETF (SPY): Balanced, with moderate risk and return, acting as a diversified asset.")
