$${\color{yellow}{\text{Applied Linear Algebra: Variance maximization using PyTorch}}}$$



---

Load essential libraries

---

In [None]:
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
plt.style.use('dark_background')
%matplotlib inline
import sys
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler

---

Mount Google Drive folder if running Google Colab

---

In [None]:
## Mount Google drive folder if running in Colab
if('google.colab' in sys.modules):
    from google.colab import drive
    drive.mount('/content/drive', force_remount = True)
    DIR = '/content/drive/MyDrive/Colab Notebooks/MAHE/MSIS Coursework/OddSem2025MAHE'
    DATA_DIR = DIR+'/Data/'
else:
    DATA_DIR = 'Data/'

---

Load the food texture dataset

---

In [None]:
## Load the food texture dataset
FILE = DATA_DIR + 'food-texture.csv'
df_food = pd.read_csv(FILE, index_col = 0, header = 0)
df_food.head(5)

---

Preprocess the dataset

---

In [None]:
## Create a list of continuous and categorical column names
continuous_cols = ['Oil', 'Density', 'Fracture', 'Hardness']
categorical_cols = ['Crispy']

# Typecasting columns to correct types
df_food[categorical_cols] = df_food[categorical_cols].astype('category')
df_food[continuous_cols] = df_food[continuous_cols].astype('float64')

## Print dataframe column types
df_food.dtypes

---

Using PyTorch, calculate an optimal direction $\mathbf{v}$ (a vector with unit magnitude) such that the variance of the projected values $$\dfrac{1}{n}\sum_{i=1}^n\left(\underbrace{\mathbf{x}^{(i)}\cdot\mathbf{v}}_{\text{projection of }i\text{th sample}}-\underbrace{\pmb{\mu}\cdot\mathbf{v}}_{\text{average of projected samples = projection of average sample}}\right)^2$$ is maximized.

The direction vector that you will get as the answer from this cell should match with the answer from the next cell where we do PCA using the in-built sklearn library.

---

In [None]:
# Data matrix (select only continuous columns)
X = torch.tensor(df_food[?].values, dtype = torch.float64)

# Mean sample
mu = torch.mean(?, axis = ?)

# Initial direction vector (has to be a unit vector)
w = torch.tensor(np.ones(X.shape[1]), dtype = torch.float64, requires_grad = True)
with torch.no_grad():
  w.data = w.data / ?


# Define optimizer (try different optimizers if answers don't match)
optimizer = torch.optim.Adam(?, lr = ?)

# Loss function
def loss_fn(w):
  loss = -torch.mean(torch.square(? - ?))
  return loss

# Optimization loop
num_epochs = 100
for epoch in range(num_epochs):
  # Zero out the gradients
  optimizer.zero_grad()

  # Loss calculation
  loss = ?

  # Backward propagation and optimization
  loss.?
  ?.step()

  # Print the loss every 2 epochs
  if epoch%2 == 0:
    print(f'Epoch {epoch}, loss = {loss.item()}')

  # Constraint satisfaction (w should be unit vector)
  with torch.no_grad():
    w.data = ? / ?

# Print the optimized direction vector
print(w)

---

PCA using sklearn module (just run the cell to get the optimized direction vector)

---

In [None]:
from sklearn.decomposition import PCA

# Create and fit PCA object
pca = PCA(n_components = X.shape[1])
pca.fit_transform(X.detach().numpy())

# Print optimal direction vector
print(pca.components_[0])