## Lecture 24: Multi-Layer Networks and Activation Functions

Today, we will use `sklearn.neural_network.MLPClassifier` to build a neural network with nonlinear activation functions to distinguish between cats and dogs.

In [1]:
import numpy as np
import os

import matplotlib.pyplot as plt
from matplotlib import rc

plt.rcParams['xtick.labelsize']=16      # change the tick label size for x axis
plt.rcParams['ytick.labelsize']=16      # change the tick label size for x axis
plt.rcParams['axes.linewidth']=1        # change the line width of the axis
plt.rcParams['xtick.major.width'] = 3   # change the tick line width of x axis
plt.rcParams['ytick.major.width'] = 3   # change the tick line width of y axis
rc('text', usetex=False)                # disable LaTeX rendering in plots
rc('font',**{'family':'DejaVu Sans'})   # set the font of the plot to be DejaVu Sans

In [2]:
from scipy import io
from sklearn.neural_network import MLPClassifier

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### 0. Load the Dog/Cat Wavelet Data

In [4]:
path = "/content/drive/MyDrive/ME491"

dog_w_path = os.path.join(path, "data/dogData_w.mat")
cat_w_path = os.path.join(path, "data/catData_w.mat")
dogwdata_mat = io.loadmat(dog_w_path)
catwdata_mat = io.loadmat(cat_w_path)
dog_w = dogwdata_mat['dog_wave']
cat_w = catwdata_mat['cat_wave']

### 1. Prepare training data and label

We are going to use the first 40 dog and cat pictures as training data, and train a neural network to predict the other 40 dog and cat pictures.

In [70]:
train_data = np.concatenate((dog_w[:, :40], cat_w[:, :40]), axis=1)
test_data = np.concatenate((dog_w[:, 40:80], cat_w[:, 40:80]), axis=1)
label = np.repeat(np.array([1, -1]), 40)
train_data = train_data.T

### 2. Build a Neural Network

For simplicity, we will use `sklearn.neural_network.MLPClassifier` to build a neural network.

Documentation: https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html#sklearn.neural_network.MLPClassifier

General sklearn neural network documentation:
https://scikit-learn.org/stable/modules/neural_networks_supervised.html

In [None]:
clf = MLPClassifier(activation = 'relu',
                    solver='adam', alpha=1e-5,
                    hidden_layer_sizes=(10,3),
                    max_iter=1000, random_state=1)
clf.fit(train, label)

### 3. Plot Prediction and Loss Function

In [None]:
prediction = clf.predict(test.T)

truth = np.repeat(np.array([1, -1]), 40)

fig = plt.figure(figsize = (10, 4))
plt.bar(range(80), prediction, alpha = 0.5, label = "prediction")
plt.bar(range(80), truth, alpha = 0.5, label = "ground truth")
plt.legend(fontsize = 18)

In [None]:
plt.plot(clf.loss_curve_)

You can also look at the coefficients for every layer.

In [None]:
coef = clf.coefs_
print(coef[0].shape)

### 4. Building NN with TensorFlow

TF Tutorials: https://www.tensorflow.org/tutorials

In [28]:
import tensorflow as tf

In [38]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Input(shape=(1024,)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(2)
])

In [39]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

In [None]:
tf_label = np.repeat(np.array([0, 1]), 40)
model.fit(train, tf_label, epochs=5)

In [None]:
model.evaluate(test.T, tf_label, verbose=2)
probability_model = tf.keras.Sequential([model,
                                         tf.keras.layers.Softmax()])
predictions = probability_model.predict(test.T)
test_label = np.argmax(predictions, axis = 1)

In [None]:
tf_truth = np.repeat(np.array([0, 1]), 40)

fig = plt.figure(figsize = (10, 4))
plt.bar(range(80), test_label+0.1, alpha = 0.5, label = "prediction")
plt.bar(range(80), tf_truth+0.1, alpha = 0.5, label = "ground truth")
plt.legend(fontsize = 18)

### 5. Building NN with PyTorch

Some tutorials:

https://www.datacamp.com/tutorial/pytorch-tutorial-building-a-simple-neural-network-from-scratch

https://pytorch.org/tutorials/recipes/recipes/defining_a_neural_network.html

In [98]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

In [99]:
class Data(Dataset):
  def __init__(self, X, y):
      self.X = torch.from_numpy(X.astype(np.float32))
      self.y = torch.from_numpy(y.astype(np.float32))
      self.len = self.X.shape[0]

  def __getitem__(self, index):
      return self.X[index], self.y[index]

  def __len__(self):
      return self.len

In [105]:
batch_size = 64
train_data_torch = Data(train_data, tf_label)
train_dataloader = DataLoader(train_data_torch, batch_size=batch_size)
test_data_torch = Data(test_data.T, tf_label)
test_dataloader = DataLoader(test_data_torch, batch_size=batch_size)

In [None]:
# Get cpu, gpu or mps device for training.
device = (
  "cuda"
  if torch.cuda.is_available()
  else "mps"
  if torch.backends.mps.is_available()
  else "cpu"
)
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(
        nn.Linear(1024, 1024),
        nn.ReLU(),
        nn.Linear(1024, 20),
        nn.ReLU(),
        nn.Linear(20, 2)
    )

  def forward(self, x):
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

model = NeuralNetwork().to(device)
print(model)

In [116]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [120]:
def train(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  model.train()
  for batch, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)
    # Compute prediction error
    pred = model(X)
    loss = loss_fn(pred, y.long())

    # Backpropagation
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if batch % 100 == 0:
      loss, current = loss.item(), (batch + 1) * len(X)
      print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [141]:
def test(dataloader, model, loss_fn):
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  model.eval()
  test_loss, correct = 0, 0
  label = []
  with torch.no_grad():
    for X, y in dataloader:
      X, y = X.to(device), y.to(device)
      pred = model(X)
      print(X.shape)
      print(pred)
      test_loss += loss_fn(pred, y.long()).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()
      label.append(pred.argmax(1))
  test_loss /= num_batches
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
  return label

In [None]:
epochs = 1
for t in range(epochs):
  print(f"Epoch {t+1}\n-------------------------------")
  train(train_dataloader, model, loss_fn, optimizer)
  label = test(test_dataloader, model, loss_fn)
print("Done!")

In [None]:
print(label)