## Lecture 19: Supervised Learning and Linear Discriminants

In [1]:
import numpy as np
import os

import matplotlib.pyplot as plt
from matplotlib import rc

plt.rcParams['xtick.labelsize']=16      # change the tick label size for x axis
plt.rcParams['ytick.labelsize']=16      # change the tick label size for x axis
plt.rcParams['axes.linewidth']=1        # change the line width of the axis
plt.rcParams['xtick.major.width'] = 3   # change the tick line width of x axis
plt.rcParams['ytick.major.width'] = 3   # change the tick line width of y axis
rc('text', usetex=False)                # disable LaTeX rendering in plots
rc('font',**{'family':'DejaVu Sans'})   # set the font of the plot to be DejaVu Sans

In [7]:
from scipy import io
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### 1. Load Dog/Cat Wavelet Data

In [4]:
path = "/content/drive/MyDrive/ME491"
dog_path = os.path.join(path, "data/dogData_w.mat")
cat_path = os.path.join(path, "data/catData_w.mat")
dogdata_mat = io.loadmat(dog_path)
catdata_mat = io.loadmat(cat_path)
dog = dogdata_mat['dog_wave']
cat = catdata_mat['cat_wave']

### 2. Perform PCA on the dataset

In [5]:
DC = np.concatenate((dog, cat), axis = 1)

# PCA
avgAnimal = np.mean(DC, axis = 1)
X = DC - np.tile(avgAnimal, (DC.shape[1],1)).T
U, S, VT = np.linalg.svd(X, full_matrices = False)

Let's now plot the loadings of PCA_2 and PCA_4 (note: in Python the indices are 1 and 3)

In [None]:
plt.plot(VT[1, :80], VT[3, :80], 'ro', markerfacecolor=(0,1,0.2), markeredgecolor='k', ms=12, label = "Dog")
plt.plot(VT[1, 80:], VT[3, 80:],' bo', markerfacecolor=(0.9,0,1), markeredgecolor='k', ms=12, label = "Cat")
plt.xlabel("PCA2", fontsize = 18)
plt.ylabel("PCA4", fontsize = 18)

### 3. Build training dataset for LDA
We are going to use the first 60 dog and cat pictures as our training dataset to build LDA. We will use the remaining 20 dog and cat pictures to be our testing dataset.

In [13]:
xtrain = np.concatenate((VT[np.array([1,3]), :60].T, VT[np.array([1,3]), 80:140].T))
# training data should have the shape of number of points x number of features per point
label = np.repeat(np.array([1,-1]),60)
test = np.concatenate((VT[np.array([1,3]), 60:80].T, VT[np.array([1,3]), 140:].T))

### 4. Perform LDA

In [None]:
lda = LinearDiscriminantAnalysis()
test_class = lda.fit(xtrain, label).predict(test)

truth = np.repeat(np.array([1,-1]),20)

fig = plt.figure(figsize = (10, 4))
plt.bar(range(40), test_class, alpha = 0.5, label = "prediction")
plt.bar(range(40), truth, alpha = 0.5, label = "ground truth")
plt.legend(fontsize = 18)

### 5. Different training features

Let's now try to use just PCA2 to see how well it does

### 6. Cross-Validation


In [None]:
E = np.zeros(100)

for jj in range(100):
  r1 = np.random.permutation(80)
  r2 = np.random.permutation(80)
  ind1 = r1[:60]
  ind2 = r2[:60]+60
  ind1t = r1[60:80]
  ind2t = r2[60:80]+60

  xtrain = np.concatenate((VT[np.array([1,3]), ind1[:, np.newaxis]], VT[np.array([1,3]), ind2[:, np.newaxis]]))
  test = np.concatenate((VT[np.array([1,3]), ind1t[:, np.newaxis]], VT[np.array([1,3]), ind2t[:, np.newaxis]]))

  label = np.repeat(np.array([1,-1]),60)

  lda = LinearDiscriminantAnalysis()
  test_class = lda.fit(xtrain, label).predict(test)

  truth = np.repeat(np.array([1, -1]), 20)
  E[jj] = 100*(1 - np.sum(np.abs(test_class-truth))/80)

plt.bar(range(100), E, color=(0.5,0.5,0.5))
plt.plot(range(100), np.mean(E)*np.ones(100), 'r:', linewidth=3)
plt.show()

### 7. In-Class Exercise

Let's now practice everything using the Dog/Cat Dataset in image space.