https://medium.com/biaslyai/pytorch-introduction-to-neural-network-feedforward-neural-network-model-e7231cff47cb

In [1]:
%%capture
import torch
import copy
import matplotlib.pyplot as plt
import numpy as np
import os 
# import torchvision.models as models
# from torchvision import transforms
# from PIL import Image

In [2]:
class Feedforward(torch.nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Feedforward, self).__init__()
        self.input_size = input_size
        self.hidden_size  = hidden_size
        self.fc1 = torch.nn.Linear(self.input_size, self.hidden_size)
        self.relu = torch.nn.ReLU()
        self.fc2 = torch.nn.Linear(self.hidden_size, 1)
        self.sigmoid = torch.nn.Sigmoid()
    def forward(self, x):
        hidden = self.fc1(x)
        relu = self.relu(hidden)
        output = self.fc2(relu)
        output = self.sigmoid(output)
        return output

In [32]:
model = Feedforward(2, 2)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)

In [4]:
# get previous last layer name
named_layers = dict(model.named_modules())
layers = list(named_layers.keys())

# too many branches, so just get the converged branch points
# '' is first layer, the input, so disregard it
layers = [x for x in layers if '.' not in x and x != '']  

In [5]:
layers

['fc1', 'relu', 'fc2', 'sigmoid']

In [39]:
# CREATE RANDOM DATA POINTS
from sklearn.datasets import make_blobs
def blob_label(y, label, loc): # assign labels
    target = np.copy(y)
    for l in loc:
        target[y == l] = label
    return target
x_train, y_train = make_blobs(n_samples=100, n_features=2, cluster_std=1.5, shuffle=True)
x_train = torch.FloatTensor(x_train)
y_train = torch.FloatTensor(blob_label(y_train, 0, [0]))
y_train = torch.FloatTensor(blob_label(y_train, 1, [1,2,3]))
x_test, y_test = make_blobs(n_samples=10, n_features=2, cluster_std=1.5, shuffle=True)
x_test = torch.FloatTensor(x_test)
y_test = torch.FloatTensor(blob_label(y_test, 0, [0]))
y_test = torch.FloatTensor(blob_label(y_test, 1, [1,2,3]))

In [40]:
model.eval()
y_pred = model(x_test)
before_train = criterion(y_pred.squeeze(), y_test)
print('Test loss before training' , before_train.item())

Test loss before training 0.9458913803100586


In [43]:
model.train()
epoch = 3000

for epoch in range(epoch):
    #sets the gradients to zero before we start backpropagation. 
    #This is a necessary step as PyTorch accumulates the gradients from the backward passes from the previous epochs.
    optimizer.zero_grad()
    # Forward pass
    y_pred = model(x_train)
    # Compute Loss
    loss = criterion(y_pred.squeeze(), y_train)
   
    print('Epoch {}: train loss: {}'.format(epoch, loss.item()))
    # Backward pass
    loss.backward()
    optimizer.step()

Epoch 0: train loss: 0.18348564207553864
Epoch 1: train loss: 0.1834333837032318
Epoch 2: train loss: 0.18338237702846527
Epoch 3: train loss: 0.18332979083061218
Epoch 4: train loss: 0.18327760696411133
Epoch 5: train loss: 0.18322676420211792
Epoch 6: train loss: 0.1831742525100708
Epoch 7: train loss: 0.1831221729516983
Epoch 8: train loss: 0.18307146430015564
Epoch 9: train loss: 0.1830189824104309
Epoch 10: train loss: 0.18296703696250916
Epoch 11: train loss: 0.18291644752025604
Epoch 12: train loss: 0.18286414444446564
Epoch 13: train loss: 0.18281228840351105
Epoch 14: train loss: 0.1827617883682251
Epoch 15: train loss: 0.18270964920520782
Epoch 16: train loss: 0.1826578676700592
Epoch 17: train loss: 0.182607501745224
Epoch 18: train loss: 0.1825554519891739
Epoch 19: train loss: 0.18250374495983124
Epoch 20: train loss: 0.1824534833431244
Epoch 21: train loss: 0.18240152299404144
Epoch 22: train loss: 0.18234993517398834
Epoch 23: train loss: 0.18229980766773224
Epoch 24: tr

In [44]:
model.eval()
y_pred = model(x_test)
after_train = criterion(y_pred.squeeze(), y_test) 
print('Test loss after Training' , after_train.item())

Test loss after Training 2.377638339996338




---
# Get Activations


In [45]:
x_train[0]

tensor([ 0.5815, -8.9559])

In [46]:
y_train[0]

tensor(1.)

In [47]:
# input = torch.randn(1, 1, 32, 32)
input = x_train[0]
out = model(input)
print(out)

tensor([0.9637], grad_fn=<SigmoidBackward0>)


In [48]:
def get_activations(input, layer_name):
    activation = {}
    def get_activation(name):
        def hook(model, input, output):
            activation[name] = output.detach()
        return hook

    for name_to_check, layer in model.named_modules():
        if name_to_check == layer_name:
            break
    layer.register_forward_hook(get_activation(layer_name))
    
    output = model(input)

    return activation.copy()  #else will return the same actvs of model

In [49]:
for layer_name in layers:
    print(get_activations(input, layer_name))

{'fc1': tensor([7.9374, 8.3800])}
{'relu': tensor([7.9374, 8.3800])}
{'fc2': tensor([3.2781])}
{'sigmoid': tensor([0.9637])}




---
Now slightly modify the input and see what happens to each layer!

Try different modification levels


In [50]:
input_2 = x_train[0] + torch.tensor([0.1,0])
input_2

tensor([ 0.6815, -8.9559])

In [51]:
for layer_name in layers:
    print(get_activations(input_2, layer_name))

{'fc1': tensor([7.9596, 8.3923])}
{'relu': tensor([7.9596, 8.3923])}
{'fc2': tensor([3.2521])}
{'sigmoid': tensor([0.9627])}




---



In [52]:
input_3 = x_train[0] + torch.tensor([1,0])
for layer_name in layers:
    print(get_activations(input_3, layer_name))

{'fc1': tensor([8.1595, 8.5033])}
{'relu': tensor([8.1595, 8.5033])}
{'fc2': tensor([3.0181])}
{'sigmoid': tensor([0.9534])}


Find a local motif. This is a local circuit?



---

Test more simple MLPs of actual datasets. 

Make dataset with actual correlation

https://www.kaggle.com/code/zelongq/simple-mlp-for-titanic-survival/execution

https://www.kaggle.com/code/pinocookie/pytorch-simple-mlp/notebook

Try to decompose every single part of a simple but effective MLP. Patience and humility- like the NN, this method is simple, but effective. Identify motifs, and play in a lab of what happens if you combine motifs- what emergence? And mathematically, WHY?

This is completely unorthodox but it may be effective.