This notebook will compare a neural network that uses activation functions vs one that does not use activation functions

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
INPUT_DIM = 100 # Number of features

In [3]:
# Generate synthetic data
X, y = make_classification(
    n_samples=5000,
    n_features=INPUT_DIM,
    n_informative=INPUT_DIM,
    n_redundant=0,
    random_state=7
)

In [4]:
X.shape, y.shape

((5000, 100), (5000,))

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
X_train.shape, X_test.shape

((4000, 100), (1000, 100))

In [7]:
# Convert data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test)

In [8]:
class NeuralNetwork(nn.Module):
    def __init__(self, use_activation):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(INPUT_DIM, 16)
        self.fc2 = nn.Linear(16, 1)
        self.use_activation = use_activation

    def forward(self, x):
        x = F.relu(self.fc1(x)) if self.use_activation else self.fc1(x)
        x = torch.sigmoid(self.fc2(x))
        return x

In [9]:
# Function to train and evaluate the model
def train_and_evaluate(model):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training loop
    for epoch in range(100):
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor.view(-1, 1))
        if epoch % 5 == 0:
          print(loss)
        loss.backward()
        optimizer.step()

    # Evaluate the model on the test set
    with torch.no_grad():
        model.eval()
        pred = model(X_test_tensor)
        predictions = (pred > 0.5).float().numpy()
        accuracy = accuracy_score(y_test_tensor, predictions)

    return accuracy

In [10]:
# Create and train the model without activation functions
model_without_activation = NeuralNetwork(use_activation=False)
accuracy_without_activation = train_and_evaluate(model_without_activation)

tensor(0.8872, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.6852, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.5549, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.4727, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.4197, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3851, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3622, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3467, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3356, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3272, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3207, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3155, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3113, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3079, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3052, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3029, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3011, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2996, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2984, grad_fn=<Bina

In [11]:
# Create and train the model with activation functions
model_with_activation = NeuralNetwork(use_activation=True)
accuracy_with_activation = train_and_evaluate(model_with_activation)

tensor(0.9008, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7666, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.6616, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.5810, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.5182, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.4676, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.4257, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3906, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3609, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3354, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3133, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2936, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2760, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2600, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2455, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2321, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2200, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2087, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.1983, grad_fn=<Bina

In [12]:
print("Accuracy without Activation Functions:", accuracy_without_activation)
print("Accuracy with Activation Functions:", accuracy_with_activation)

Accuracy without Activation Functions: 0.859
Accuracy with Activation Functions: 0.901


Note there may be situations where the adding of activation function decreases performance. This could be because adding activations causes overfitting. And maybe adding dropout could be useful.