In [1]:
# Core numerical and data libraries
import numpy as np
import pandas as pd

# Database connection
from sqlalchemy import create_engine


In [2]:
# Database credentials
# NOTE: Credentials will be externalized in Week-4
DB_USER = "root"
DB_PASSWORD = "1433"
DB_HOST = "localhost"
DB_NAME = "financial_analytics"

# Create MySQL engine
engine = create_engine(
    f"mysql+mysqlconnector://{DB_USER}:{DB_PASSWORD}@{DB_HOST}/{DB_NAME}"
)


In [3]:
# Load adjusted prices from database
query = """
SELECT asset_id, trade_date, adj_close
FROM market_prices
ORDER BY asset_id, trade_date
"""

df = pd.read_sql(query, engine)

# Convert trade_date to datetime
df["trade_date"] = pd.to_datetime(df["trade_date"])

df.head()


Unnamed: 0,asset_id,trade_date,adj_close
0,1,2019-01-02,37.538822
1,1,2019-01-03,33.799679
2,1,2019-01-04,35.242561
3,1,2019-01-07,35.164124
4,1,2019-01-08,35.834446


In [4]:
# Calculate daily log returns for Monte Carlo input
df["log_return"] = (
    df
    .groupby("asset_id")["adj_close"]
    .transform(lambda x: np.log(x / x.shift(1)))
)

# Drop initial NaN returns
returns_df = df.dropna(subset=["log_return"])

returns_df.head()


Unnamed: 0,asset_id,trade_date,adj_close,log_return
1,1,2019-01-03,33.799679,-0.104924
2,1,2019-01-04,35.242561,0.041803
3,1,2019-01-07,35.164124,-0.002228
4,1,2019-01-08,35.834446,0.018883
5,1,2019-01-09,36.442982,0.016839


In [5]:
# Create returns matrix
# Rows: dates, Columns: assets
returns_matrix = (
    returns_df
    .pivot(index="trade_date", columns="asset_id", values="log_return")
)

returns_matrix.head()


asset_id,1,2,3,4,5,6,7,8,9,10,11
trade_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019-01-03,-0.104924,-0.037482,-0.014314,-0.016156,-0.015473,-0.007036,-0.016018,-0.031978,-0.005156,0.003564,-0.025068
2019-01-04,0.041803,0.04546,0.036203,0.040692,0.036206,0.020205,0.016644,0.056094,0.006227,0.002487,0.033759
2019-01-07,-0.002228,0.001274,0.000695,-0.000782,0.005186,-0.004008,-0.006435,0.052935,0.011703,0.006367,0.006986
2019-01-08,0.018883,0.007225,-0.001887,-0.001958,0.007245,0.003684,0.022961,0.001164,0.006957,0.028846,0.009649
2019-01-09,0.016839,0.014198,-0.001692,0.009752,0.005261,-0.016466,-0.007957,0.009438,-0.003261,-0.012663,0.00409


In [6]:
# Mean daily returns per asset
mean_returns = returns_matrix.mean()

# Covariance matrix of returns
cov_matrix = returns_matrix.cov()

mean_returns, cov_matrix


(asset_id
 1     0.001065
 2     0.000873
 3     0.000741
 4     0.000511
 5     0.000551
 6     0.000365
 7     0.000413
 8     0.001715
 9     0.000818
 10    0.000025
 11    0.000569
 dtype: float64,
 asset_id        1         2         3         4         5         6   \
 asset_id                                                               
 1         0.000381  0.000244  0.000156  0.000175  0.000116  0.000092   
 2         0.000244  0.000318  0.000144  0.000156  0.000086  0.000085   
 3         0.000156  0.000144  0.000353  0.000343  0.000202  0.000076   
 4         0.000175  0.000156  0.000343  0.000431  0.000233  0.000085   
 5         0.000116  0.000086  0.000202  0.000233  0.000386  0.000055   
 6         0.000092  0.000085  0.000076  0.000085  0.000055  0.000160   
 7         0.000075  0.000064  0.000079  0.000084  0.000066  0.000084   
 8         0.000379  0.000317  0.000229  0.000253  0.000128  0.000046   
 9         0.000095  0.000089  0.000068  0.000078  0.000047  0.0000

In [7]:
# Check dimensions
print("Number of assets:", mean_returns.shape[0])
print("Covariance matrix shape:", cov_matrix.shape)


Number of assets: 11
Covariance matrix shape: (11, 11)
