## Part 1
Q1.
Dataset: https://www.kaggle.com/datasets/jillanisofttech/tesla-stock-price

In [13]:
from sklearn.preprocessing import StandardScaler
import pymc as pm
import numpy as np
import pandas as pd

data = pd.read_csv('/content/Tasla_Stock_Updated_V2.csv')
features = data[['Open', 'High', 'Low', 'Volume']]
target = data['Close']

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(features)
y = target.values.reshape(-1, 1)  # Reshape y to match dimensions

n, p = X.shape

# Bayesian Linear Regression with the prepared dataset
with pm.Model() as MLR:
    betas = pm.MvNormal('betas', mu=np.zeros((p, 1)), cov=np.eye(p), shape=(p, 1))
    sigma = pm.TruncatedNormal('sigma', mu=1, sigma=1, lower=0)  # half normal
    y_observed = pm.Normal('y', mu=pm.math.dot(X, betas), sigma=sigma, observed=y)

    # Sampling
    idata = pm.sample(draws=1000, return_inferencedata=True)

pm.summary(idata)


Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
"betas[0, 0]",17.337,0.914,15.741,19.156,0.015,0.011,3581.0,1661.0,1.0
"betas[0, 1]",17.312,0.939,15.443,18.998,0.015,0.011,3878.0,1554.0,1.0
"betas[0, 2]",17.318,0.946,15.501,18.972,0.014,0.01,4280.0,1535.0,1.0
"betas[0, 3]",17.325,0.886,15.556,18.857,0.014,0.01,4204.0,1661.0,1.0
"betas[1, 0]",17.377,0.955,15.566,19.097,0.017,0.012,3242.0,1422.0,1.0
"betas[1, 1]",17.356,0.924,15.655,19.137,0.015,0.01,4013.0,1403.0,1.0
"betas[1, 2]",17.348,0.913,15.65,19.013,0.016,0.011,3456.0,1434.0,1.0
"betas[1, 3]",17.34,0.894,15.785,19.051,0.015,0.011,3501.0,1658.0,1.0
"betas[2, 0]",17.336,0.905,15.725,19.105,0.015,0.01,3814.0,1755.0,1.0
"betas[2, 1]",17.333,0.944,15.639,19.125,0.016,0.011,3652.0,1450.0,1.0


Q2.


In [14]:
with pm.Model() as MLR:
    betas = pm.MvNormal('betas', mu=np.zeros((p, 1)), cov=np.eye(p), shape=(p, 1))
    sigma = pm.HalfCauchy('sigma', beta=1)  # Using Half-Cauchy
    y_observed = pm.Normal('y', mu=pm.math.dot(X, betas), sigma=sigma, observed=y)

    idata = pm.sample(draws=1000, return_inferencedata=True)
pm.summary(idata)

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
"betas[0, 0]",10.582,0.908,8.832,12.265,0.014,0.01,4001.0,1339.0,1.0
"betas[0, 1]",10.575,0.912,8.81,12.287,0.013,0.01,4544.0,1789.0,1.0
"betas[0, 2]",10.592,1.003,8.66,12.4,0.014,0.01,5296.0,1089.0,1.0
"betas[0, 3]",10.58,0.949,8.728,12.288,0.015,0.011,3845.0,1358.0,1.0
"betas[1, 0]",10.586,0.962,8.786,12.357,0.016,0.011,3638.0,1354.0,1.0
"betas[1, 1]",10.618,0.937,8.882,12.404,0.013,0.009,5425.0,1574.0,1.0
"betas[1, 2]",10.625,0.946,8.829,12.355,0.015,0.011,3862.0,1551.0,1.0
"betas[1, 3]",10.578,0.948,8.77,12.304,0.015,0.011,4095.0,1545.0,1.0
"betas[2, 0]",10.629,0.979,8.779,12.4,0.015,0.011,4055.0,1515.0,1.0
"betas[2, 1]",10.598,0.98,8.977,12.648,0.016,0.012,3670.0,1529.0,1.0


## Part 2
Q1.


When substituting $\boldsymbol \Sigma = \sigma^2 \mathbf{I}$ and its inverse as $\frac{1}{\sigma^2} \mathbf{I}$:

- The Mean simplifies to:
$$\left[\frac{1}{\sigma^2}\mathbf{X}^\top\mathbf{X} + \boldsymbol \Sigma_\beta^{-1}\right]^{-1}\left(\frac{1}{\sigma^2}\mathbf{X}^\top\mathbf{y} + \boldsymbol \Sigma_\beta^{-1}\boldsymbol\beta_0\right)$$

- The Covariance simplifies to:
$$\left[\frac{1}{\sigma^2}\mathbf{X}^\top\mathbf{X} + \boldsymbol \Sigma_\beta^{-1}\right]^{-1}$$


Q2.

The expected value represents the posterior mean of $\boldsymbol \beta$, calculated under specified conditions.

The expression $(\mathbf{X}^\top \boldsymbol\Sigma^{-1} \mathbf{X} + \boldsymbol \Sigma_\beta^{-1})$ incorporates information from both the prior knowledge and the observed data, acting as the posterior distribution's precision. Meanwhile, $\mathbf{X}^\top \boldsymbol\Sigma^{-1} \mathbf{y} + \boldsymbol \Sigma_\beta^{-1} \boldsymbol\beta_0$merges the data influence with the prior mean to establish the posterior mean.

Q3.

We need to set the prior on $\boldsymbol \beta$ to be non-informative, effectively disregarding the prior's influence, such as setting the prior covariance to infinity or its precision to zero.

Q4.
Setting the prior covariance matrix of $\boldsymbol \beta$ to be infinitely large or effectively non-informative would make the Bayesian predictive mean $E[\mathbf{\hat y} = \mathbf{X}\boldsymbol \beta | \boldsymbol\Sigma, \mathbf{X},\mathbf{y}]$ equivalent to the ordinary least squares prediction $\mathbf{X}(\mathbf{X^\top X})^{-1}\mathbf{X^\top y}$. This condition implies ignoring the prior's influence, aligning the Bayesian estimate with the frequentist OLS prediction.


Q5.

$$
\text{Var}[\boldsymbol \beta |\boldsymbol\Sigma, \mathbf{X},\mathbf{y}] = \left(\mathbf{X}^{\top}\boldsymbol\Sigma^{-1} \mathbf{X}  + \boldsymbol\Sigma_\beta^{-1}\right)^{-1}
$$


## Part 3

Q1.
We use the same dataset in part 1


In [None]:

with pm.Model() as MNV_LKJ:
    # Define the Cholesky factor of the covariance matrix
    packed_L = pm.LKJCholeskyCov("packed_L", n=p, eta=2.0, sd_dist=pm.Exponential.dist(1.0, shape=p), compute_corr=False)
    # Expand the packed Cholesky factor to a full matrix
    L = pm.expand_packed_triangular(p, packed_L)

    # Define the multivariate normal distribution for the observed data
    mu = pm.Normal('mu', mu=np.zeros(p), sigma=1, shape=p)  # Ensure mu is a vector of appropriate length
    y_obs = pm.MvNormal('y_obs', mu=mu, chol=L, observed=y)

with MNV_LKJ:
    trace1 = pm.sample(1000)

pm.summary(trace1)


Q2.

In [4]:


with pm.Model() as model:
    chol, corr, stds = pm.LKJCholeskyCov("chol", n=p, eta=2, sd_dist=pm.Exponential.dist(1.0), compute_corr=True)
    mu = pm.Normal('mu', mu=0, sigma=10, shape=p)

    y_obs = pm.MvNormal('y_obs', mu=mu, chol=chol, observed=y)

    trace2 = pm.sample(1000, tune=1000, return_inferencedata=True)

pm.summary(trace2)


ValueError: Incompatible Elemwise input shapes [(1, 511), (1, 13)]

Q3.

In [None]:
with pm.Model() as model:

    mu = pm.Normal('mu', mu=np.zeros(p), sigma=np.ones(p)*5, shape=p)
    sd_dist = pm.HalfNormal.dist(sigma=2.5)
    chol, corr, stds = pm.LKJCholeskyCov("chol", n=p, eta=2, sd_dist=sd_dist, compute_corr=True)

    y_obs = pm.MvNormal('y_obs', mu=mu, chol=chol, observed=y)

    trace3 = pm.sample(1000, tune=1000, return_inferencedata=True)
pm.summary(trace3)