In [3]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

In [None]:
# Step 1: Load and preprocess the data
df = pd.read_csv('AAPL.csv')

# Convert date to datetime and set as index
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Check for missing values and data types
print("Data Info:")
print(df.info())
print("\nData Description:")
print(df.describe())
print("\nMissing values:\n", df.isnull().sum())

# Step 2: Create a candlestick chart
fig = go.Figure(data=[go.Candlestick(x=df.index,
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])])

fig.update_layout(title='Apple Stock Candlestick Chart', xaxis_title='Date', yaxis_title='Price')
fig.show()

# Step 3: Aggregate data to last business day of each month
monthly_data = df.resample('BM').agg({
    'Open': 'mean',
    'High': 'max',
    'Low': 'min',
    'Close': 'mean',
    'Volume': 'sum'
})

print("\nMonthly Data (first 5 rows):")
print(monthly_data.head().to_markdown())
print("\nNumber of months in the dataset:", len(monthly_data))

# Step 4: Compute daily returns
# Method 1: Using pct_change
daily_returns_pct = df['Open'].pct_change()

# Method 2: Vectorized approach
daily_returns_vec = (df['Open'] - df['Open'].shift(1)) / df['Open'].shift(1)

# Verify that both methods produce the same result
print("\nAre the results from both methods equal?", np.allclose(daily_returns_pct, daily_returns_vec, equal_nan=True))

# Display the daily returns
print("\nDaily Returns (first few rows):")
print(daily_returns_pct.head())
print("\nDaily Returns (last few rows):")
print(daily_returns_pct.tail())
print("\nLength of daily returns:", len(daily_returns_pct))