<span style=' font-size:20px'>Arithmetic Computation Testing</span>

In [1]:
from torch.core.tensor import Tensor
import numpy as np

## Elementwise Arithemtic Operations

In [61]:
def test_unit_arithmetic_operations():
    print("ðŸ§ª Unit Test: Arithmetic Operations...")

    # Test tensor + tensor
    a = Tensor([1, 2, 3])
    b = Tensor([4, 5, 6])
    result = a + b
    assert np.array_equal(result.data, np.array([5, 7, 9], dtype=np.float32))

    # Test tensor + scaler
    result = a + 10
    assert np.array_equal(result.data, np.array([11, 12, 13], dtype=np.float32))

    # testing broadcasting with different shapes(matrix + vector)
    matrix = Tensor([[1, 2], [3, 4]])
    vector = Tensor([10, 20])
    result = matrix + vector
    expected = np.array([[11, 22], [13, 24]], dtype=np.float32)
    assert np.array_equal(result.data, expected)

    # Test subtraction (data centering)
    result = b - a
    assert np.array_equal(result.data, np.array([3, 3, 3], dtype=np.float32))

    # Test multiplication (scaling)
    result = a * 2
    assert np.array_equal(result.data, np.array([2, 4, 6], dtype=np.float32))

    # Test division 
    normalized = (a - 2) / 2 # center and scale
    expected = np.array([-0.5, 0.0, 0.5], dtype=np.float32)
    assert np.allclose(normalized.data, expected)
    
    print("âœ… Arithmetic operations work correctly!")


if __name__ == "__main__":
    test_unit_arithmetic_operations()

ðŸ§ª Unit Test: Arithmetic Operations...
âœ… Arithmetic operations work correctly!


## Matrix Multiplcation The Heart of Neural Operations

In [9]:
def test_unit_matrix_multiplication():
    """ðŸ§ª Test matrix multiplication operations."""
    print("ðŸ§ª Unit Test: Matrix Multiplication...")

    # test 2 x 2 matrix multiplication
    a = Tensor([[1, 2], [3, 4]])
    b = Tensor([[5, 6], [7, 8]])
    result = a.matmul(b)
    expected = np.array([[19, 22], [43, 50]], dtype=np.float32)
    assert np.array_equal(result.data, expected)

    # Test rectangular matrices(common in nueral networks)
    c = Tensor([[1, 2 , 3], [4, 5, 6]])
    d = Tensor([[7, 8], [9, 10], [11, 12]])
    result = c.matmul(d)
    expected = np.array([[58, 64], [139, 154]], dtype=np.float32)
    assert np.array_equal(result.data, expected)

    # matrix vector multiplication
    matrix = Tensor([[1, 2, 3], [4, 5, 6]])
    vector = Tensor([1, 2, 3])
    result = matrix.matmul(vector)
    expected = np.array([14, 32])
    assert np.array_equal(result.data, expected)

    # Test shape validation - should raise clear errors
    try:
        incompatible_a = Tensor([[1, 2]])
        incompatible_b = Tensor([[1], [2], [3]])
        incompatible_a.matmul(incompatible_b)
        assert False, "Should have raised Value Error for incompatible shapes"
    except ValueError as e:
        assert "Inner dimensions must match" in str(e)
        assert "2 â‰  3" in str(e)

    print("âœ… Shape manipulation works correctly!")
    

if __name__ == '__main__':
    test_unit_matrix_multiplication()

ðŸ§ª Unit Test: Matrix Multiplication...
âœ… Shape manipulation works correctly!


## Shape Manipulation Tests

In [62]:
def test_unit_matrix_multiplication():
    """ðŸ§ª Test reshape and transpose operations."""
    print("ðŸ§ª Unit Test: Shape Manipulation...")

    # Test basic reshape (flatten => matrix)
    tensor = Tensor([1, 2, 3, 4, 5, 6])
    reshaped = tensor.reshape(2, 3)
    assert reshaped.shape == (2, 3)
    expected = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
    assert np.array_equal(reshaped.data, expected)

    # Test reshape with tuple (aternative calling style)
    reshaped2 = tensor.reshape((3, 2))
    assert reshaped2.shape == (3, 2)
    expected = np.array([[1, 2], [3, 4], [5, 6]])
    assert np.array_equal(reshaped2.data, expected)

    # Test reshape with -1 (automatic dimension inference)
    auto_reshaped = tensor.reshape(2, -1)
    assert auto_reshaped.shape == (2, 3)

    # Test reshape validation - should raise error for incompatible sizes
    try:
        tensor.reshape(2, 2)
        assert False, 'Should have raised ValueError'
    except ValueError as e:
        assert "Total elements must match" in str(e)
        assert "6 â‰  4" in str(e)

    # Test matrix transpose (most common case)
    matrix = Tensor([[1, 2, 3], [4, 5, 6]])
    transposed = matrix.transpose()
    assert transposed.shape == (3, 2)
    expected = np.array([[1, 4], [2, 5], [3, 6]], dtype=np.float32)
    assert np.array_equal(transposed.data, expected)

    # test 1D transpose (should be identity)
    vector = Tensor([1, 2, 3])
    vector_t = vector.transpose()
    assert np.array_equal(vector.data, vector_t.data)

    # test specific dimensions transpose
    tensor_3d = Tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) 
    swapped = tensor_3d.transpose(0, 2)
    assert swapped.shape == (2, 2, 2)

    # Test neural network reshape pattern (flatten for MLP)
    batch_images = Tensor(np.random.randn(2, 3, 4)) # (batch= 2, height= 3, width = 4)
    flattened = batch_images.reshape(2, -1) # (batch=2, features= 12)
    assert flattened.shape == (2, 12)
        
    print("âœ… Shape manipulation works correctly!")

if __name__=='__main__':
    test_unit_matrix_multiplication()

ðŸ§ª Unit Test: Shape Manipulation...
âœ… Shape manipulation works correctly!


## Reduction Operations: Aggregation Operations

In [64]:
def test_unit_shape_manipulation():
    """ðŸ§ª Test reduction operations."""
    print("ðŸ§ª Unit Test: Reduction Operations...")

    matrix = Tensor([[1, 2, 3], [4, 5, 6]])
    # test sum all elements
    total = matrix.sum()
    assert total.data == 21.0
    assert total.shape == ()

    # Test sum along axis 0 (colums) - batch dimension reduction 
    col_sum = matrix.sum(axis= 0)
    expected_col = np.array([5, 7, 9], dtype= np.float32)
    assert np.array_equal(col_sum.data, expected_col)
    assert col_sum.shape == (3, )

    # Test sum along axis 1 (row) - feature dimesnion reduction
    row_sum= matrix.sum(1)
    expected_row = np.array([6, 15], dtype=np.float32)
    assert row_sum.shape == (2, )

    # Test mean (average loss computation)
    avg = matrix.mean()
    assert np.isclose(avg.data, 3.5)
    assert avg.shape == ()

    # Test mean along axis (batch normalization pattern)
    col_mean = matrix.mean(axis=0)
    expected_mean = np.array([2.5, 3.5, 4.5], dtype=np.float32)  # [5/2, 7/2, 9/2]
    assert np.allclose(col_mean.data, expected_mean)

    # Test max (finding best predictions)
    maximum = matrix.max()
    assert maximum.data == 6.0
    assert maximum.shape == ()

    # Test max along axis (argmax-like operation)
    row_max = matrix.max(axis=1)
    expected_max = np.array([3, 6], dtype=np.float32)  # [max(1,2,3), max(4,5,6)]
    assert np.array_equal(row_max.data, expected_max)

    # Test keepdims (important for broadcasting)
    sum_keepdims = matrix.sum(axis=1, keepdims=True)
    assert sum_keepdims.shape == (2, 1) 
    expected_keepdims = np.array([[6], [15]], dtype=np.float32)
    assert np.array_equal(sum_keepdims.data, expected_keepdims)

    # Test 3D reduction (simulating global average pooling)
    tensor_3d = Tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])  # (2, 2, 2)
    spatial_mean = tensor_3d.mean(axis=(1, 2))  # Average across spatial dimensions
    assert spatial_mean.shape == (2,)  # One value per batch item
    

    print("âœ… Reduction operations work correctly!")

if __name__ == '__main__':
    test_unit_shape_manipulation()

ðŸ§ª Unit Test: Reduction Operations...
âœ… Reduction operations work correctly!


In [60]:
BYTES_PER_FLOAT32 = 4
KB_TO_BYTES = 1024
MB_TO_BYTES = 1024 * 1024

def analyze_memory_layout():
    """ðŸ“Š Demonstrate cache effects with row vs column access patterns."""
    print("ðŸ“Š Analyzing Memory Access Patterns...")
    print("=" * 60)
    
    import time 
    
    size = 2000
    matrix = Tensor(np.random.rand(size, size))

    import time
    print(f'\nTesting with {size}x{size} matrix ({matrix.size * BYTES_PER_FLOAT32 / MB_TO_BYTES:.1f}MB)')
    print('-'*60)
    print(f'Test 1: Row-wise Access (Cache friendly)')
    start = time.time()
    row_sums = []
    for i in range(size):
        row_sum = matrix.data[i, :].sum() # access entire row sequentially
        row_sums.append(row_sum)
    row_time = time.time() - start
    print(f"  Time: {row_time*1000:.1f}ms")
    print(f"  Access pattern: sequential (follows memory layout)")

    print(f'\nTest 2: Column-wise Access (Cache unfriendly)')
    start = time.time()
    col_sums = []
    for i in range(size):
        col_sum = matrix.data[:, i].sum()
        col_sums.append(col_sum)
    col_time = time.time() - start
    print(f"  Time: {row_time*1000:.1f}ms")
    print(f"  Access pattern: Strided (jumps {size * BYTES_PER_FLOAT32} bytes per element)")
    
    # Calculate slowdown
    slowdown = col_time / row_time
    print("\n" + "=" * 60)
    print(f"ðŸ“Š PERFORMANCE IMPACT:")
    print(f"   Slowdown factor: {slowdown:.2f}Ã— ({col_time/row_time:.1f}Ã— slower)")
    print(f"   Cache misses cause {(slowdown-1)*100:.0f}% performance loss")

    # Educational insights
    print("\nðŸ’¡ KEY INSIGHTS:")
    print(f"   1. Memory layout matters: Row-major (C-style) storage is sequential")
    print(f"   2. Cache lines are ~64 bytes: Row access loads nearby elements \"for free\"")
    print(f"   3. Column access misses cache: Must reload from DRAM every time")
    print(f"   4. This is O(n) algorithm but {slowdown:.1f}Ã— different wall-clock time!")

    print("\nðŸš€ REAL-WORLD IMPLICATIONS:")
    print(f"   â€¢ CNNs use NCHW format (channels sequential) for cache efficiency")
    print(f"   â€¢ Matrix multiplication optimized with blocking (tile into cache-sized chunks)")
    print(f"   â€¢ Transpose is expensive ({slowdown:.1f}Ã—) because it changes memory layout")
    print(f"   â€¢ This is why GPU frameworks obsess over memory coalescing")

    print("\n" + "=" * 60)


if __name__ == '__main__':
    analyze_memory_layout()

ðŸ“Š Analyzing Memory Access Patterns...

Testing with 2000x2000 matrix (15.3MB)
------------------------------------------------------------
Test 1: Row-wise Access (Cache friendly)
  Time: 17.0ms
  Access pattern: sequential (follows memory layout)

Test 2: Column-wise Access (Cache unfriendly)
  Time: 17.0ms
  Access pattern: Strided (jumps 8000 bytes per element)

ðŸ“Š PERFORMANCE IMPACT:
   Slowdown factor: 2.93Ã— (2.9Ã— slower)
   Cache misses cause 193% performance loss

ðŸ’¡ KEY INSIGHTS:
   1. Memory layout matters: Row-major (C-style) storage is sequential
   2. Cache lines are ~64 bytes: Row access loads nearby elements "for free"
   3. Column access misses cache: Must reload from DRAM every time
   4. This is O(n) algorithm but 2.9Ã— different wall-clock time!

ðŸš€ REAL-WORLD IMPLICATIONS:
   â€¢ CNNs use NCHW format (channels sequential) for cache efficiency
   â€¢ Matrix multiplication optimized with blocking (tile into cache-sized chunks)
   â€¢ Transpose is expensive (

## Bringing It all Together

In [67]:
def test_module():
    """ðŸ§ª Module Test: Complete Integration

    Comprehensive test of entire module functionality.

    This final test runs before module summary to ensure:
    - All unit tests pass
    - Functions work together correctly
    - Module is ready for integration with TinyTorch
    """
    print("ðŸ§ª RUNNING MODULE INTEGRATION TEST")
    print("=" * 50)

    # Run all unit tests
    print("Running unit tests...")
    # test_unit_tensor_creation()
    # test_unit_arithmetic_operations()
    # test_unit_matrix_multiplication()
    # test_unit_shape_manipulation()
    # test_unit_reduction_operations()

    print("\nRunning integration scenarios...")

    # Test realistic neural network computation
    print("ðŸ§ª Integration Test: Two-Layer Neural Network...")

    # Create input data (2 samples, 3 features)
    x = Tensor([[1, 2, 3], [4, 5, 6]])

    # First layer: 3 inputs â†’ 4 hidden units
    W1 = Tensor([[0.1, 0.2, 0.3, 0.4],
                 [0.5, 0.6, 0.7, 0.8],
                 [0.9, 1.0, 1.1, 1.2]])
    b1 = Tensor([0.1, 0.2, 0.3, 0.4])

    # Forward pass: hidden = xW1 + b1
    hidden = x.matmul(W1) + b1
    assert hidden.shape == (2, 4), f"Expected (2, 4), got {hidden.shape}"

    # Second layer: 4 hidden â†’ 2 outputs
    W2 = Tensor([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6], [0.7, 0.8]])
    b2 = Tensor([0.1, 0.2])

    # Output layer: output = hiddenW2 + b2
    output = hidden.matmul(W2) + b2
    assert output.shape == (2, 2), f"Expected (2, 2), got {output.shape}"

    # Verify data flows correctly (no NaN, reasonable values)
    assert not np.isnan(output.data).any(), "Output contains NaN values"
    assert np.isfinite(output.data).all(), "Output contains infinite values"

    print("âœ… Two-layer neural network computation works!")

    # Test complex shape manipulations
    print("ðŸ§ª Integration Test: Complex Shape Operations...")
    data = Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])

    # Reshape to 3D tensor (simulating batch processing)
    tensor_3d = data.reshape(2, 2, 3)  # (batch=2, height=2, width=3)
    assert tensor_3d.shape == (2, 2, 3)

    # Global average pooling simulation
    pooled = tensor_3d.mean(axis=(1, 2))  # Average across spatial dimensions
    assert pooled.shape == (2,), f"Expected (2,), got {pooled.shape}"

    # Flatten for MLP
    flattened = tensor_3d.reshape(2, -1)  # (batch, features)
    assert flattened.shape == (2, 6)

    # Transpose for different operations
    transposed = tensor_3d.transpose()  # Should transpose last two dims
    assert transposed.shape == (2, 3, 2)

    print("âœ… Complex shape operations work!")

    # Test broadcasting edge cases
    print("ðŸ§ª Integration Test: Broadcasting Edge Cases...")

    # Scalar broadcasting
    scalar = Tensor(5.0)
    vector = Tensor([1, 2, 3])
    result = scalar + vector  # Should broadcast scalar to vector shape
    expected = np.array([6, 7, 8], dtype=np.float32)
    assert np.array_equal(result.data, expected)

    # Matrix + vector broadcasting
    matrix = Tensor([[1, 2], [3, 4]])
    vec = Tensor([10, 20])
    result = matrix + vec
    expected = np.array([[11, 22], [13, 24]], dtype=np.float32)
    assert np.array_equal(result.data, expected)

    print("âœ… Broadcasting edge cases work!")

    print("\n" + "=" * 50)
    print("ðŸŽ‰ ALL TESTS PASSED! Module ready for export.")
    print("Run: tito module complete 01_tensor")

# Run comprehensive module test
if __name__ == "__main__":
    test_module()

ðŸ§ª RUNNING MODULE INTEGRATION TEST
Running unit tests...

Running integration scenarios...
ðŸ§ª Integration Test: Two-Layer Neural Network...
âœ… Two-layer neural network computation works!
ðŸ§ª Integration Test: Complex Shape Operations...
âœ… Complex shape operations work!
ðŸ§ª Integration Test: Broadcasting Edge Cases...
âœ… Broadcasting edge cases work!

ðŸŽ‰ ALL TESTS PASSED! Module ready for export.
Run: tito module complete 01_tensor
