In [1]:
import numpy as np
import os
import sys

from tinytorch.core.layers import Linear, Dropout, Sequential
from tinytorch.core.tensor import Tensor
from tinytorch.core.activations import ReLU, Sigmoid
from tinytorch.core.autograd import enable_autograd

enable_autograd()

## Unit Test: Linear Layers

In [2]:
XAVIER_SCALE_FACTOR = 2.0

def test_unit_linear_layer():
    print("ðŸ”¬ Unit Test: Linear Layer...")
    layer = Linear(784, 256)

    # Test layer creation
    assert layer.in_features == 784
    assert layer.out_features == 256
    assert layer.weight.shape == (784, 256)
    assert layer.bias.shape == (256,)

    # Test xavier initialization (weights should be reasonably scaled)
    weight_std = np.std(layer.weight.data)
    expected_std = np.sqrt(XAVIER_SCALE_FACTOR / 784)
    assert 0.5 * expected_std < weight_std < 2.0 * expected_std, f'Weight std {weight_std} not close to Xavier {expected_std} '

    # Test bias initialization  (should be zeros)
    assert np.allclose(layer.bias.data, 0), 'Bias should be initialized to zeros'

    # Test forward pass
    x = Tensor(np.random.randn(32, 784)) # batch of 32 samples
    y = layer.forward(x)
    assert y.shape == (32, 256), f'Expected shapr (32, 256, got {y.shape}'
    
    # Test no bias option
    layer_no_bias = Linear(10, 5, bias= False)
    assert layer_no_bias.bias is None
    params = layer_no_bias.parameters()
    assert len(params) == 1 

    # Test parameters method
    params = layer.parameters()
    assert len(params) == 2
    assert params[0] is layer.weight
    assert params[1] is layer.bias


    print("âœ… Linear layer works correctly!")

if __name__ == '__main__':
    test_unit_linear_layer()

ðŸ”¬ Unit Test: Linear Layer...
âœ… Linear layer works correctly!


## Edge case test: Linear 

In [3]:
def test_edge_cases_linear():
    print("ðŸ”¬ Edge Case Tests: Linear Layer...")

    layer = Linear(10, 5)

    # Test single sample (should handle 2D input)
    x_2d = Tensor(np.random.randn(1, 10))
    y = layer.forward(x_2d)
    assert y.shape == (1, 5), 'should handle single cases'

    # Test zero batch size (edge case)
    x_empty = Tensor(np.random.randn(0, 10))
    y_empty = layer.forward(x_empty)
    assert y_empty.shape == (0, 5), 'should handle empty batch'

    # Test numerical stability with large weights
    layer_large = Linear(10, 5)
    layer_large.weight.data = np.ones((10, 5)) * 100
    x = Tensor(np.ones((1, 10)))
    y = layer_large.forward(x)
    assert not np.any(np.isnan(y.data)), 'should not produce NaN with large weights'
    assert not np.any(np.isinf(y.data)), 'should not produce inf with large weights'

    # Test with no bias
    layer_no_bias= Linear(10, 5, bias= False)
    x = Tensor(np.random.randn(4, 10))
    y = layer_no_bias.forward(x)
    assert y.shape == (4, 5), 'should work without bias'

    print("âœ… Edge cases handled correctly!")
if __name__ =='__main__':
    test_edge_cases_linear()

ðŸ”¬ Edge Case Tests: Linear Layer...
âœ… Edge cases handled correctly!


## Parameter Collections Tests: Linear

In [4]:
def test_parameter_collection_linear():
    print("ðŸ”¬ Parameter Collection Test: Linear Layer...")

    layer = Linear(10, 5)

    # verify parameter collection works
    params = layer.parameters()
    assert len(params) == 2, 'should return 2 parameters (weight and bias)'
    assert params[0].shape == (10, 5), 'first param should be weight'
    assert params[1].shape == (5,), 'second parameter should be bias'

    # Test layer without bias
    layer_no_bias = Linear(10, 5, bias= False)
    params_no_bias = layer_no_bias.parameters()
    assert len(params_no_bias) == 1, 'should return 1 parameter (weight only)'
    
    print("âœ… Parameter collection works correctly!")

if __name__ == '__main__':
    test_parameter_collection_linear()

ðŸ”¬ Parameter Collection Test: Linear Layer...
âœ… Parameter collection works correctly!


## Unit test: Dropout Layer

In [5]:
def test_unit_dropout_layer():
    print("ðŸ”¬ Unit Test: Dropout Layer...")

    dropout = Dropout(0.5)
    assert dropout.p == 0.5

    # test inference mode, should pass unchanged
    x = Tensor([1, 2, 3, 4])
    y_inference = dropout.forward(x, training= False)
    assert np.array_equal(x.data, y_inference.data), 'Inference should pass through unchanged'

    # Test training mode with zero dropout (should pass through unchanged)
    dropout_zero = Dropout(0.0)
    y_zero = dropout_zero.forward(x, training= True)
    assert np.array_equal(x.data, y_zero.data), 'Zero dropout should pass through unchanged'

    dropout_full = Dropout(1.0)
    y_full = dropout_full.forward(x, training= True)
    assert np.allclose(y_full.data, 0), 'Full dropout should zero everything'

    # Test training mode with partial dropout
    np.random.seed(42)
    x_large = Tensor(np.ones((1000,)))
    y_train = dropout.forward(x_large, training= True)

    # count non zero elements
    non_zero_count = np.count_nonzero(y_train.data)
    expected = 500
    std_error = np.sqrt(1000 * 0.5 * 0.5)
    lower_bound = expected - 3 * std_error
    upper_bound = expected + 3 * std_error
    assert lower_bound < non_zero_count < upper_bound, f'Expected {expected}Â±{3*std-error:.0f} survivors, got {non_zero_count}'

    # Test sclaing (surviving element should be scaled y 1 / (1- p) = 2.0)
    surviving_values = y_train.data[y_train.data != 0]
    expected_value = 2.0 
    assert np.allclose(surviving_values, expected_value), f'Surviving values should be {expected_value}'

    # Test no parameters
    params = dropout.parameters()
    assert len(params) == 0, 'Dropout should have no parameters'

    # Test invalid probabilities
    try:
        Dropout(-0.1)
        assert False, 'Should raise ValueError for negative probability'
    except ValueError:
        pass

    try:
        Dropout(1.1)
        assert False, 'Should raise ValueEror for probability > 1'
    except ValueError:
        pass

    print("âœ… Dropout layer works correctly!")

if __name__=='__main__':
    test_unit_dropout_layer()

ðŸ”¬ Unit Test: Dropout Layer...
âœ… Dropout layer works correctly!


## Integration Bringing it Together 

In [6]:
def analyze_layer_memory():
    """ðŸ“Š Analyze memory usage patterns in layer operations."""
    print("ðŸ“Š Analyzing Layer Memory Usage...")

    # Test different layer sizes
    layer_configs = [
        (784, 256),   # MNIST â†’ hidden
        (256, 256),   # Hidden â†’ hidden
        (256, 10),    # Hidden â†’ output
        (2048, 2048), # Large hidden
    ]

    print("\nLinear Layer Memory Analysis:")
    print("Configuration â†’ Weight Memory â†’ Bias Memory â†’ Total Memory")

    for in_feat, out_feat in layer_configs:
        # Calculate memory usage
        weight_memory = in_feat * out_feat * 4  # 4 bytes per float32
        bias_memory = out_feat * 4
        total_memory = weight_memory + bias_memory

        print(f"({in_feat:4d}, {out_feat:4d}) â†’ {weight_memory/1024:7.1f} KB â†’ {bias_memory/1024:6.1f} KB â†’ {total_memory/1024:7.1f} KB")

    # Analyze multi-layer memory scaling
    print("\nðŸ’¡ Multi-layer Model Memory Scaling:")
    hidden_sizes = [128, 256, 512, 1024, 2048]

    for hidden_size in hidden_sizes:
        # 3-layer MLP: 784 â†’ hidden â†’ hidden/2 â†’ 10
        layer1_params = 784 * hidden_size + hidden_size
        layer2_params = hidden_size * (hidden_size // 2) + (hidden_size // 2)
        layer3_params = (hidden_size // 2) * 10 + 10

        total_params = layer1_params + layer2_params + layer3_params
        memory_mb = total_params * 4 / (1024 * 1024)

        print(f"Hidden={hidden_size:4d}: {total_params:7,} params = {memory_mb:5.1f} MB")

# Analysis will be run in main block

if __name__ == '__main__':
    analyze_layer_memory()

ðŸ“Š Analyzing Layer Memory Usage...

Linear Layer Memory Analysis:
Configuration â†’ Weight Memory â†’ Bias Memory â†’ Total Memory
( 784,  256) â†’   784.0 KB â†’    1.0 KB â†’   785.0 KB
( 256,  256) â†’   256.0 KB â†’    1.0 KB â†’   257.0 KB
( 256,   10) â†’    10.0 KB â†’    0.0 KB â†’    10.0 KB
(2048, 2048) â†’ 16384.0 KB â†’    8.0 KB â†’ 16392.0 KB

ðŸ’¡ Multi-layer Model Memory Scaling:
Hidden= 128: 109,386 params =   0.4 MB
Hidden= 256: 235,146 params =   0.9 MB
Hidden= 512: 535,818 params =   2.0 MB
Hidden=1024: 1,333,770 params =   5.1 MB
Hidden=2048: 3,716,106 params =  14.2 MB


In [7]:
def analyze_layer_performance():
    """ðŸ“Š Analyze computational complexity of layer operations."""
    import time

    print("ðŸ“Š Analyzing Layer Computational Complexity...")

    # Test forward pass FLOPs
    batch_sizes = [1, 32, 128, 512]
    layer = Linear(784, 256)

    print("\nLinear Layer FLOPs Analysis:")
    print("Batch Size â†’ Matrix Multiply FLOPs â†’ Bias Add FLOPs â†’ Total FLOPs")

    for batch_size in batch_sizes:
        # Matrix multiplication: (batch, in) @ (in, out) = batch * in * out FLOPs
        matmul_flops = batch_size * 784 * 256
        # Bias addition: batch * out FLOPs
        bias_flops = batch_size * 256
        total_flops = matmul_flops + bias_flops

        print(f"{batch_size:10d} â†’ {matmul_flops:15,} â†’ {bias_flops:13,} â†’ {total_flops:11,}")

    # Add timing measurements
    print("\nLinear Layer Timing Analysis:")
    print("Batch Size â†’ Time (ms) â†’ Throughput (samples/sec)")

    for batch_size in batch_sizes:
        x = Tensor(np.random.randn(batch_size, 784))

        # Warm up
        for _ in range(10):
            _ = layer.forward(x)

        # Time multiple iterations
        iterations = 100
        start = time.perf_counter()
        for _ in range(iterations):
            _ = layer.forward(x)
        elapsed = time.perf_counter() - start

        time_per_forward = (elapsed / iterations) * 1000  # Convert to ms
        throughput = (batch_size * iterations) / elapsed

        print(f"{batch_size:10d} â†’ {time_per_forward:8.3f} ms â†’ {throughput:12,.0f} samples/sec")

    print("\nðŸ’¡ Key Insights:")
    print("ðŸš€ Linear layer complexity: O(batch_size Ã— in_features Ã— out_features)")
    print("ðŸš€ Memory grows linearly with batch size, quadratically with layer width")
    print("ðŸš€ Dropout adds minimal computational overhead (element-wise operations)")
    print("ðŸš€ Larger batches amortize overhead, improving throughput efficiency")

# Analysis will be run in main block
if __name__=='__main__':
    analyze_layer_performance()

ðŸ“Š Analyzing Layer Computational Complexity...

Linear Layer FLOPs Analysis:
Batch Size â†’ Matrix Multiply FLOPs â†’ Bias Add FLOPs â†’ Total FLOPs
         1 â†’         200,704 â†’           256 â†’     200,960
        32 â†’       6,422,528 â†’         8,192 â†’   6,430,720
       128 â†’      25,690,112 â†’        32,768 â†’  25,722,880
       512 â†’     102,760,448 â†’       131,072 â†’ 102,891,520

Linear Layer Timing Analysis:
Batch Size â†’ Time (ms) â†’ Throughput (samples/sec)
         1 â†’    0.962 ms â†’        1,040 samples/sec
        32 â†’   31.517 ms â†’        1,015 samples/sec
       128 â†’  101.279 ms â†’        1,264 samples/sec
       512 â†’  404.145 ms â†’        1,267 samples/sec

ðŸ’¡ Key Insights:
ðŸš€ Linear layer complexity: O(batch_size Ã— in_features Ã— out_features)
ðŸš€ Memory grows linearly with batch size, quadratically with layer width
ðŸš€ Dropout adds minimal computational overhead (element-wise operations)
ðŸš€ Larger batches amortize overhe

In [8]:
model = Sequential(
    Linear(784, 256),
    ReLU(),
    Dropout(0.5),
    Linear(256, 128),
    ReLU(),
    Dropout(0.3),
    Linear(128, 10), 
    Sigmoid()
)
x = Tensor(np.random.random((30, 784)))
output = model(x)

## Module Integration Testing

In [9]:
def test_module():
    print("ðŸ§ª RUNNING MODULE INTEGRATION TEST")
    print("=" * 50)

    print("âœ… Multi-layer network integration works!")
    print("Running unit tests...")
    test_unit_linear_layer()
    test_edge_cases_linear()
    test_parameter_collection_linear()
    test_unit_dropout_layer()

    print("\nRunning integration scenarios...")

    # Test realistic neural network construction with manual composition
    print("ðŸ”¬ Integration Test: Multi-layer Network...")

    ReLU_class = ReLU
    # Build individual layers for manual composition
    layer1 = Linear(784, 128)
    activation1 = ReLU_class()
    dropout1 = Dropout(0.5)
    layer2 = Linear(128, 64)
    activation2 = ReLU_class()
    dropout2 = Dropout(0.3)
    layer3 = Linear(64, 10)

    # Test end-to-end forward pass with manual composition
    batch_size = 16
    x = Tensor(np.random.randn(batch_size, 784))

    # Manual forward pass
    x = layer1.forward(x)
    x = activation1.forward(x)
    x = dropout1.forward(x)
    x = layer2.forward(x)
    x = activation2.forward(x)
    x = dropout2.forward(x)
    output = layer3.forward(x)

    assert output.shape == (batch_size, 10), f"Expected output shape ({batch_size}, 10), got {output.shape}"

    # Test parameter counting from individual layers
    all_params = layer1.parameters() + layer2.parameters() + layer3.parameters()
    expected_params = 6  # 3 weights + 3 biases from 3 Linear layers
    assert len(all_params) == expected_params, f"Expected {expected_params} parameters, got {len(all_params)}"

    # Test individual layer functionality
    test_x = Tensor(np.random.randn(4, 784))
    # Test dropout in training vs inference
    dropout_test = Dropout(0.5)
    train_output = dropout_test.forward(test_x, training=True)
    infer_output = dropout_test.forward(test_x, training=False)
    assert np.array_equal(test_x.data, infer_output.data), "Inference mode should pass through unchanged"

    print("âœ… Multi-layer network integration works!")

    print("\n" + "=" * 50)
    print("ðŸŽ‰ ALL TESTS PASSED! Module ready for export.")
    print("Run: tito module complete 03_layers")

if __name__=='__main__':
    test_module()
    

ðŸ§ª RUNNING MODULE INTEGRATION TEST
âœ… Multi-layer network integration works!
Running unit tests...
ðŸ”¬ Unit Test: Linear Layer...
âœ… Linear layer works correctly!
ðŸ”¬ Edge Case Tests: Linear Layer...
âœ… Edge cases handled correctly!
ðŸ”¬ Parameter Collection Test: Linear Layer...
âœ… Parameter collection works correctly!
ðŸ”¬ Unit Test: Dropout Layer...
âœ… Dropout layer works correctly!

Running integration scenarios...
ðŸ”¬ Integration Test: Multi-layer Network...
âœ… Multi-layer network integration works!

ðŸŽ‰ ALL TESTS PASSED! Module ready for export.
Run: tito module complete 03_layers


In [10]:
if __name__ == "__main__":
    print("=" * 70)
    print("MODULE 03: LAYERS - COMPREHENSIVE VALIDATION")
    print("=" * 70)

    # Run module integration test
    test_module()

    print("\n" + "=" * 70)
    print("SYSTEMS ANALYSIS")
    print("=" * 70)

    # Run analysis functions
    analyze_layer_memory()
    print("\n")
    analyze_layer_performance()

    print("\n" + "=" * 70)
    print("âœ… MODULE 03 COMPLETE!")
    print("=" * 70)

MODULE 03: LAYERS - COMPREHENSIVE VALIDATION
ðŸ§ª RUNNING MODULE INTEGRATION TEST
âœ… Multi-layer network integration works!
Running unit tests...
ðŸ”¬ Unit Test: Linear Layer...
âœ… Linear layer works correctly!
ðŸ”¬ Edge Case Tests: Linear Layer...
âœ… Edge cases handled correctly!
ðŸ”¬ Parameter Collection Test: Linear Layer...
âœ… Parameter collection works correctly!
ðŸ”¬ Unit Test: Dropout Layer...
âœ… Dropout layer works correctly!

Running integration scenarios...
ðŸ”¬ Integration Test: Multi-layer Network...
âœ… Multi-layer network integration works!

ðŸŽ‰ ALL TESTS PASSED! Module ready for export.
Run: tito module complete 03_layers

SYSTEMS ANALYSIS
ðŸ“Š Analyzing Layer Memory Usage...

Linear Layer Memory Analysis:
Configuration â†’ Weight Memory â†’ Bias Memory â†’ Total Memory
( 784,  256) â†’   784.0 KB â†’    1.0 KB â†’   785.0 KB
( 256,  256) â†’   256.0 KB â†’    1.0 KB â†’   257.0 KB
( 256,   10) â†’    10.0 KB â†’    0.0 KB â†’    10.0 KB
(2048, 2048) â†’ 16384.0 KB