# 2. Building a Layer from Scratch

A layer contains multiple neurons!
Each neuron in a layer processes the same inputs but with different weights.
Let's build a layer from scratch!


In [1]:
import torch
import numpy as np


## 1. What is a Layer?

A layer is a collection of neurons!
All neurons receive the same inputs, but each has its own weights and bias.
Output of a layer = vector (one output per neuron).


In [2]:
# Manual calculation: layer with 2 neurons, 2 inputs each
# Inputs
inputs = torch.tensor([2.0, 3.0])

# Layer weights: 2 neurons × 2 inputs = 2×2 matrix
# Row i = weights for neuron i
weights = torch.tensor([[0.5, 0.3],   # Neuron 1 weights
                       [0.2, 0.4]])    # Neuron 2 weights

# Biases: one per neuron
biases = torch.tensor([0.1, 0.2])

print("Layer with 2 neurons, 2 inputs:")
print(f"Inputs: {inputs}")
print(f"Weights shape: {weights.shape} (neurons × inputs)")
print(f"Weights:\n{weights}")
print(f"Biases: {biases}")
print()

# Forward pass: each neuron computes its output
# This is just matrix multiplication!
outputs = inputs @ weights.T + biases  # weights.T because we need (inputs × weights)

print("Forward pass:")
print(f"Outputs = inputs @ weights.T + biases")
print(f"        = {inputs} @ {weights.T} + {biases}")
print(f"        = {outputs}")
print()

# Apply activation (sigmoid) to each output
activation = torch.sigmoid(outputs)
print(f"After activation (sigmoid): {activation}")

# Show step by step for each neuron
print("\nStep by step for each neuron:")
for i in range(2):
    weighted_sum = torch.dot(weights[i], inputs) + biases[i]
    activated = torch.sigmoid(weighted_sum)
    print(f"Neuron {i+1}: {weights[i]} · {inputs} + {biases[i]} = {weighted_sum:.4f} → {activated:.4f}")


Layer with 2 neurons, 2 inputs:
Inputs: tensor([2., 3.])
Weights shape: torch.Size([2, 2]) (neurons × inputs)
Weights:
tensor([[0.5000, 0.3000],
        [0.2000, 0.4000]])
Biases: tensor([0.1000, 0.2000])

Forward pass:
Outputs = inputs @ weights.T + biases
        = tensor([2., 3.]) @ tensor([[0.5000, 0.2000],
        [0.3000, 0.4000]]) + tensor([0.1000, 0.2000])
        = tensor([2.0000, 1.8000])

After activation (sigmoid): tensor([0.8808, 0.8581])

Step by step for each neuron:
Neuron 1: tensor([0.5000, 0.3000]) · tensor([2., 3.]) + 0.10000000149011612 = 2.0000 → 0.8808
Neuron 2: tensor([0.2000, 0.4000]) · tensor([2., 3.]) + 0.20000000298023224 = 1.8000 → 0.8581


## 2. Step 1: Initialize a Layer

First, let's create a layer class that can store weights and biases for multiple neurons.
We'll build it step by step!


In [None]:
# Step 1: Create a simple layer class that just stores weights and biases
class Layer:
    """Layer with multiple neurons - Step 1: Just initialization"""
    
    def __init__(self, num_inputs, num_neurons):
        # Weight matrix: (num_neurons, num_inputs)
        # Each row is weights for one neuron
        self.weights = torch.randn(num_neurons, num_inputs) * 0.1
        # Bias vector: one per neuron
        self.biases = torch.randn(num_neurons) * 0.1

# Let's create a layer and see what it looks like
layer = Layer(num_inputs=2, num_neurons=3)

print("Step 1: Layer Initialization")
print(f"Number of inputs: 2")
print(f"Number of neurons: 3")
print(f"Weights shape: {layer.weights.shape} (neurons × inputs)")
print(f"Weights:\n{layer.weights}")
print()
print(f"Biases shape: {layer.biases.shape} (one per neuron)")
print(f"Biases: {layer.biases}")
print()
print("Good! Our layer now has weights and biases for all neurons initialized.")


Step 1: Layer Initialization
Number of inputs: 2
Number of neurons: 3
Weights shape: torch.Size([3, 2]) (neurons × inputs)
Weights:
tensor([[-0.1767, -0.0288],
        [ 0.0175, -0.0682],
        [-0.0155,  0.0168]])

Biases shape: torch.Size([3]) (one per neuron)
Biases: tensor([ 0.0342,  0.0569, -0.1017])

Good! Our layer now has weights and biases for all neurons initialized.


## 3. Step 2: Calculate Weighted Sums for All Neurons

Now let's add a method to calculate weighted sums for all neurons using matrix multiplication!


In [4]:
# Step 2: Add method to calculate weighted sums for all neurons
class Layer:
    """Layer with multiple neurons - Step 2: Add weighted sum calculation"""
    
    def __init__(self, num_inputs, num_neurons):
        # Weight matrix: (num_neurons, num_inputs)
        # Each row is weights for one neuron
        self.weights = torch.randn(num_neurons, num_inputs) * 0.1
        # Bias vector: one per neuron
        self.biases = torch.randn(num_neurons) * 0.1
    
    def weighted_sum(self, inputs):
        """Calculate weighted sums for all neurons: inputs @ weights.T + biases"""
        # Matrix multiplication: inputs @ weights.T + biases
        # This computes weighted sum for all neurons at once!
        return inputs @ self.weights.T + self.biases

# Test it
layer = Layer(num_inputs=2, num_neurons=3)
layer.weights = torch.tensor([[0.5, 0.3],   # Neuron 1 weights
                             [0.2, 0.4],    # Neuron 2 weights
                             [0.1, 0.6]])   # Neuron 3 weights
layer.biases = torch.tensor([0.1, 0.2, 0.3])

inputs = torch.tensor([2.0, 3.0])
result = layer.weighted_sum(inputs)

print("Step 2: Weighted Sum Calculation for All Neurons")
print(f"Inputs: {inputs}")
print(f"Weights shape: {layer.weights.shape}")
print(f"Weights:\n{layer.weights}")
print(f"Biases: {layer.biases}")
print()
print(f"Weighted sums = inputs @ weights.T + biases")
print(f"              = {inputs} @ {layer.weights.T} + {layer.biases}")
print(f"              = {result}")
print()
print("Great! Now we can calculate weighted sums for all neurons at once using matrix multiplication!")


Step 2: Weighted Sum Calculation for All Neurons
Inputs: tensor([2., 3.])
Weights shape: torch.Size([3, 2])
Weights:
tensor([[0.5000, 0.3000],
        [0.2000, 0.4000],
        [0.1000, 0.6000]])
Biases: tensor([0.1000, 0.2000, 0.3000])

Weighted sums = inputs @ weights.T + biases
              = tensor([2., 3.]) @ tensor([[0.5000, 0.2000, 0.1000],
        [0.3000, 0.4000, 0.6000]]) + tensor([0.1000, 0.2000, 0.3000])
              = tensor([2.0000, 1.8000, 2.3000])

Great! Now we can calculate weighted sums for all neurons at once using matrix multiplication!


## 4. Step 3: Add Activation Function

The activation function makes neurons non-linear. Let's add sigmoid activation first.


In [5]:
# Step 3: Add sigmoid activation function
class Layer:
    """Layer with multiple neurons - Step 3: Add sigmoid activation"""
    
    def __init__(self, num_inputs, num_neurons):
        # Weight matrix: (num_neurons, num_inputs)
        # Each row is weights for one neuron
        self.weights = torch.randn(num_neurons, num_inputs) * 0.1
        # Bias vector: one per neuron
        self.biases = torch.randn(num_neurons) * 0.1
    
    def weighted_sum(self, inputs):
        """Calculate weighted sums for all neurons: inputs @ weights.T + biases"""
        return inputs @ self.weights.T + self.biases
    
    def sigmoid(self, x):
        """Sigmoid activation: applied element-wise to all neurons"""
        return torch.sigmoid(x)

# Test it step by step
layer = Layer(num_inputs=2, num_neurons=3)
layer.weights = torch.tensor([[0.5, 0.3],
                             [0.2, 0.4],
                             [0.1, 0.6]])
layer.biases = torch.tensor([0.1, 0.2, 0.3])

inputs = torch.tensor([2.0, 3.0])

# Step 1: Calculate weighted sums
ws = layer.weighted_sum(inputs)
print("Step 3: Adding Activation Function")
print(f"1. Weighted sums = {ws}")

# Step 2: Apply sigmoid to all neurons
output = layer.sigmoid(ws)
print(f"2. After sigmoid activation = {output}")
print()
print("Perfect! Now we have a complete layer with activation.")


Step 3: Adding Activation Function
1. Weighted sums = tensor([2.0000, 1.8000, 2.3000])
2. After sigmoid activation = tensor([0.8808, 0.8581, 0.9089])

Perfect! Now we have a complete layer with activation.


## 5. Step 4: Complete Forward Pass

Now let's combine everything into a single `forward` method that does it all!


In [6]:
# Step 4: Complete layer with forward pass
class Layer:
    """Layer with multiple neurons - Complete version"""
    
    def __init__(self, num_inputs, num_neurons):
        # Weight matrix: (num_neurons, num_inputs)
        # Each row is weights for one neuron
        self.weights = torch.randn(num_neurons, num_inputs) * 0.1
        # Bias vector: one per neuron
        self.biases = torch.randn(num_neurons) * 0.1
    
    def sigmoid(self, x):
        """Sigmoid activation: applied element-wise"""
        return torch.sigmoid(x)
    
    def forward(self, inputs):
        """Forward pass: weighted sums + activation"""
        # Step 1: Calculate weighted sums for all neurons
        weighted_sum = inputs @ self.weights.T + self.biases
        # Step 2: Apply activation to all neurons
        output = self.sigmoid(weighted_sum)
        return output

# Test the complete layer
layer = Layer(num_inputs=2, num_neurons=3)
layer.weights = torch.tensor([[0.5, 0.3],
                             [0.2, 0.4],
                             [0.1, 0.6]])
layer.biases = torch.tensor([0.1, 0.2, 0.3])

inputs = torch.tensor([2.0, 3.0])
outputs = layer.forward(inputs)

print("Step 4: Complete Forward Pass")
print(f"Number of inputs: 2")
print(f"Number of neurons: 3")
print(f"Inputs: {inputs}")
print()
print(f"Weights shape: {layer.weights.shape} (neurons × inputs)")
print(f"Weights:\n{layer.weights}")
print()
print(f"Biases: {layer.biases}")
print()
print(f"Outputs: {outputs}")
print(f"Output shape: {outputs.shape} (one output per neuron)")
print()
print("Excellent! Our layer is complete and working!")


Step 4: Complete Forward Pass
Number of inputs: 2
Number of neurons: 3
Inputs: tensor([2., 3.])

Weights shape: torch.Size([3, 2]) (neurons × inputs)
Weights:
tensor([[0.5000, 0.3000],
        [0.2000, 0.4000],
        [0.1000, 0.6000]])

Biases: tensor([0.1000, 0.2000, 0.3000])

Outputs: tensor([0.8808, 0.8581, 0.9089])
Output shape: torch.Size([3]) (one output per neuron)

Excellent! Our layer is complete and working!


## 6. Test with Single Input

Let's test our layer with a single input vector!


In [7]:
# Test layer with single input
layer = Layer(num_inputs=2, num_neurons=3)

single_input = torch.tensor([1.0, 2.0])
single_output = layer.forward(single_input)

print("Single input:")
print(f"Input: {single_input}")
print(f"Input shape: {single_input.shape}")
print()
print(f"Output: {single_output}")
print(f"Output shape: {single_output.shape} (one output per neuron)")
print()
print("Notice: One input vector produces one output vector with 3 values (one per neuron)!")


Single input:
Input: tensor([1., 2.])
Input shape: torch.Size([2])

Output: tensor([0.4309, 0.4839, 0.4158])
Output shape: torch.Size([3]) (one output per neuron)

Notice: One input vector produces one output vector with 3 values (one per neuron)!


## 7. Test with Batch of Inputs

Layers can process multiple inputs at once! This is called batch processing.


In [8]:
# Test layer with batch of inputs
layer = Layer(num_inputs=2, num_neurons=3)

# Batch of inputs (3 samples)
batch_inputs = torch.tensor([[1.0, 2.0],
                             [3.0, 4.0],
                             [5.0, 6.0]])

batch_outputs = layer.forward(batch_inputs)

print("Batch of inputs:")
print(f"Input shape: {batch_inputs.shape} (batch_size=3, num_inputs=2)")
print(f"Inputs:\n{batch_inputs}")
print()
print(f"Output shape: {batch_outputs.shape} (batch_size=3, num_neurons=3)")
print(f"Outputs:\n{batch_outputs}")
print()
print("Notice: Layer processes all inputs in batch simultaneously!")
print("Each row of output corresponds to one input in batch.")
print("This is much faster than processing one at a time!")


Batch of inputs:
Input shape: torch.Size([3, 2]) (batch_size=3, num_inputs=2)
Inputs:
tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])

Output shape: torch.Size([3, 3]) (batch_size=3, num_neurons=3)
Outputs:
tensor([[0.5357, 0.4388, 0.4490],
        [0.5318, 0.3557, 0.4718],
        [0.5280, 0.2804, 0.4946]])

Notice: Layer processes all inputs in batch simultaneously!
Each row of output corresponds to one input in batch.
This is much faster than processing one at a time!


## 9. Try Different Activation Functions

Layers can use different activation functions! Let's add support for more activations.


In [9]:
# Enhanced layer with multiple activation functions
class Layer:
    """Layer with multiple activation functions"""
    
    def __init__(self, num_inputs, num_neurons, activation='sigmoid'):
        # Weight matrix: (num_neurons, num_inputs)
        # Each row is weights for one neuron
        self.weights = torch.randn(num_neurons, num_inputs) * 0.1
        # Bias vector: one per neuron
        self.biases = torch.randn(num_neurons) * 0.1
        self.activation_name = activation
    
    def activate(self, x):
        """Apply activation function element-wise"""
        if self.activation_name == 'sigmoid':
            return torch.sigmoid(x)
        elif self.activation_name == 'relu':
            return torch.relu(x)
        elif self.activation_name == 'tanh':
            return torch.tanh(x)
        else:
            return x  # No activation (linear)
    
    def forward(self, inputs):
        """Forward pass: weighted sums + activation"""
        weighted_sum = inputs @ self.weights.T + self.biases
        output = self.activate(weighted_sum)
        return output

# Test different activation functions
print("Testing different activation functions:")
print("Input = [2.0, 3.0], 2 neurons")
print()

activations = ['sigmoid', 'relu', 'tanh', 'linear']

for act_name in activations:
    layer = Layer(num_inputs=2, num_neurons=2, activation=act_name)
    layer.weights = torch.tensor([[1.0, 0.5],
                                 [0.5, 1.0]])
    layer.biases = torch.tensor([0.0, 0.0])
    
    input_val = torch.tensor([2.0, 3.0])
    weighted_sum = input_val @ layer.weights.T + layer.biases
    output = layer.forward(input_val)
    
    print(f"{act_name:8s}: weighted_sums = {weighted_sum} → outputs = {output}")


Testing different activation functions:
Input = [2.0, 3.0], 2 neurons

sigmoid : weighted_sums = tensor([3.5000, 4.0000]) → outputs = tensor([0.9707, 0.9820])
relu    : weighted_sums = tensor([3.5000, 4.0000]) → outputs = tensor([3.5000, 4.0000])
tanh    : weighted_sums = tensor([3.5000, 4.0000]) → outputs = tensor([0.9982, 0.9993])
linear  : weighted_sums = tensor([3.5000, 4.0000]) → outputs = tensor([3.5000, 4.0000])
