In [1]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

In [2]:
# Step 1: Load and preprocess the data
df = pd.read_csv('AAPL.csv')

# Convert date to datetime and set as index
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Check for missing values and data types
print("Data Info:")
print(df.info())
print("\nData Description:")
print(df.describe())
print("\nMissing values:\n", df.isnull().sum())

# Step 2: Create a candlestick chart
fig = go.Figure(data=[go.Candlestick(x=df.index,
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])])

fig.update_layout(title='Apple Stock Candlestick Chart', xaxis_title='Date', yaxis_title='Price')
fig.show()

# Step 3: Aggregate data to last business day of each month
monthly_data = df.resample('BM').agg({
    'Open': 'mean',
    'High': 'max',
    'Low': 'min',
    'Close': 'mean',
    'Volume': 'sum'
})

print("\nMonthly Data (first 5 rows):")
print(monthly_data.head().to_markdown())
print("\nNumber of months in the dataset:", len(monthly_data))

# Step 4: Compute daily returns
# Method 1: Using pct_change
daily_returns_pct = df['Open'].pct_change()

# Method 2: Vectorized approach
daily_returns_vec = (df['Open'] - df['Open'].shift(1)) / df['Open'].shift(1)

# Verify that both methods produce the same result
print("\nAre the results from both methods equal?", np.allclose(daily_returns_pct, daily_returns_vec, equal_nan=True))

# Display the daily returns
print("\nDaily Returns (first few rows):")
print(daily_returns_pct.head())
print("\nDaily Returns (last few rows):")
print(daily_returns_pct.tail())
print("\nLength of daily returns:", len(daily_returns_pct))

Data Info:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 10119 entries, 1980-12-12 to 2021-01-29
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       10118 non-null  float64
 1   High       10118 non-null  float64
 2   Low        10118 non-null  float64
 3   Close      10118 non-null  float64
 4   Adj Close  10118 non-null  float64
 5   Volume     10118 non-null  float64
dtypes: float64(6)
memory usage: 553.4 KB
None

Data Description:
               Open          High           Low         Close     Adj Close  \
count  10118.000000  10118.000000  10118.000000  10118.000000  10118.000000   
mean      10.160287     10.270756     10.048267     10.163447      9.612853   
std       20.235232     20.476322     19.989577     20.242380     19.915597   
min        0.049665      0.049665      0.049107      0.049107      0.038671   
25%        0.272321      0.279018      0.265625      0.272321      0.230952   
50% 


Monthly Data (first 5 rows):
| Date                |     Open |     High |      Low |    Close |      Volume |
|:--------------------|---------:|---------:|---------:|---------:|------------:|
| 1980-12-31 00:00:00 | 0.136075 | 0.161272 | 0.112723 | 0.135903 | 1.34485e+09 |
| 1981-01-30 00:00:00 | 0.141768 | 0.155134 | 0.126116 | 0.141316 | 6.08989e+08 |
| 1981-02-27 00:00:00 | 0.118215 | 0.128906 | 0.106027 | 0.117892 | 3.21619e+08 |
| 1981-03-31 00:00:00 | 0.111328 | 0.120536 | 0.09654  | 0.110871 | 7.00717e+08 |
| 1981-04-30 00:00:00 | 0.121811 | 0.131138 | 0.108259 | 0.121545 | 5.36928e+08 |

Number of months in the dataset: 482

Are the results from both methods equal? False

Daily Returns (first few rows):
Date
1980-12-12         NaN
1980-12-15   -0.047823
1980-12-16   -0.073063
1980-12-17    0.019703
1980-12-18    0.028992
Name: Open, dtype: float64

Daily Returns (last few rows):
Date
2021-01-25    0.049824
2021-01-26    0.003704
2021-01-27   -0.001184
2021-01-28   -0.027261
2


'BM' is deprecated and will be removed in a future version, please use 'BME' instead.


The default fill_method='pad' in Series.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.

